# Chapter 7

In [1]:
import pandas as pd

url = "https://github.com/mattharrison/datasets/raw/master/data/vehicles.csv.zip"
df = pd.read_csv(url, dtype_backend="pyarrow", engine="pyarrow")

city_mpg = df.city08
highway_mpg = df.highway08

In [2]:
city_mpg

0        19
1         9
2        23
3        10
4        17
         ..
41139    19
41140    20
41141    18
41142    18
41143    16
Name: city08, Length: 41144, dtype: int64[pyarrow]

In [3]:
highway_mpg

0        25
1        14
2        33
3        12
4        23
         ..
41139    26
41140    28
41141    24
41142    24
41143    21
Name: highway08, Length: 41144, dtype: int64[pyarrow]

## Exercises

In [4]:
# 1. Documentation can be accessed using the ? operator in Jupyter notebooks:
city_mpg.size?

[31mType:[39m        property
[31mString form:[39m <property object at 0x7ca274f17600>
[31mDocstring:[39m  
Return the number of elements in the underlying data.

Examples
--------
For Series:

>>> s = pd.Series(['Ant', 'Bear', 'Cow'])
>>> s
0     Ant
1    Bear
2     Cow
dtype: object
>>> s.size
3

For Index:

>>> idx = pd.Index([1, 2, 3])
>>> idx
Index([1, 2, 3], dtype='int64')
>>> idx.size
3

In [5]:
city_mpg.is_monotonic_increasing?

[31mType:[39m        property
[31mString form:[39m <property object at 0x7ca274f176f0>
[31mDocstring:[39m  
Return boolean if values in the object are monotonically increasing.

Returns
-------
bool

Examples
--------
>>> s = pd.Series([1, 2, 2])
>>> s.is_monotonic_increasing
True

>>> s = pd.Series([3, 2, 1])
>>> s.is_monotonic_increasing
False

In [6]:
city_mpg.axes?

[31mType:[39m        property
[31mString form:[39m <property object at 0x7ca274825e90>
[31mDocstring:[39m   Return a list of the row axis labels.

In [7]:
city_mpg.dtype?

[31mType:[39m        property
[31mString form:[39m <property object at 0x7ca274825c10>
[31mDocstring:[39m  
Return the dtype object of the underlying data.

Examples
--------
>>> s = pd.Series([1, 2, 3])
>>> s.dtype
dtype('int64')

In [8]:
city_mpg.T?

[31mType:[39m        property
[31mString form:[39m <property object at 0x7ca274f16fc0>
[31mDocstring:[39m  
Return the transpose, which is by definition self.

Examples
--------
For Series:

>>> s = pd.Series(['Ant', 'Bear', 'Cow'])
>>> s
0     Ant
1    Bear
2     Cow
dtype: object
>>> s.T
0     Ant
1    Bear
2     Cow
dtype: object

For Index:

>>> idx = pd.Index([1, 2, 3])
>>> idx.T
Index([1, 2, 3], dtype='int64')

In [9]:
# 2.
import pyarrow as pa

s = pd.Series(["hi", "bye"], dtype=pd.ArrowDtype(pa.string()))
len(dir(s.str))

102

In [10]:
s.str.casefold?

[31mSignature:[39m s.str.casefold()
[31mDocstring:[39m
Convert strings in the Series/Index to be casefolded.

Equivalent to :meth:`str.casefold`.

Returns
-------
Series or Index of object

See Also
--------
Series.str.lower : Converts all characters to lowercase.
Series.str.upper : Converts all characters to uppercase.
Series.str.title : Converts first character of each word to uppercase and
    remaining to lowercase.
Series.str.capitalize : Converts first character to uppercase and
    remaining to lowercase.
Series.str.swapcase : Converts uppercase to lowercase and lowercase to
    uppercase.
Series.str.casefold: Removes all case distinctions in the string.

Examples
--------
>>> s = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
>>> s
0                 lower
1              CAPITALS
2    this is a sentence
3              SwApCaSe
dtype: object

>>> s.str.lower()
0                 lower
1              capitals
2    this is a sentence
3              swapcase
d

In [11]:
s.str.join?

[31mSignature:[39m s.str.join(sep: [33m'str'[39m)
[31mDocstring:[39m
Join lists contained as elements in the Series/Index with passed delimiter.

If the elements of a Series are lists themselves, join the content of these
lists using the delimiter passed to the function.
This function is an equivalent to :meth:`str.join`.

Parameters
----------
sep : str
    Delimiter to use between list entries.

Returns
-------
Series/Index: object
    The list entries concatenated by intervening occurrences of the
    delimiter.

Raises
------
AttributeError
    If the supplied Series contains neither strings nor lists.

See Also
--------
str.join : Standard library version of this method.
Series.str.split : Split strings around given separator/delimiter.

Notes
-----
If any of the list items is not a string object, the result of the join
will be `NaN`.

Examples
--------
Example with a list that contains non-string elements.

>>> s = pd.Series([['lion', 'elephant', 'zebra'],
...              

In [12]:
s.str.slice_replace?

[31mSignature:[39m s.str.slice_replace(start=[38;5;28;01mNone[39;00m, stop=[38;5;28;01mNone[39;00m, repl=[38;5;28;01mNone[39;00m)
[31mDocstring:[39m
Replace a positional slice of a string with another value.

Parameters
----------
start : int, optional
    Left index position to use for the slice. If not specified (None),
    the slice is unbounded on the left, i.e. slice from the start
    of the string.
stop : int, optional
    Right index position to use for the slice. If not specified (None),
    the slice is unbounded on the right, i.e. slice until the
    end of the string.
repl : str, optional
    String for replacement. If not specified (None), the sliced region
    is replaced with an empty string.

Returns
-------
Series or Index
    Same type as the original object.

See Also
--------
Series.str.slice : Just slicing without replacement.

Examples
--------
>>> s = pd.Series(['a', 'ab', 'abc', 'abdc', 'abcde'])
>>> s
0        a
1       ab
2      abc
3     abdc
4    a

In [13]:
# 3.
import datetime as dt

s = pd.Series([dt.datetime(2000, 1, 1), dt.datetime(2023, 12, 31)], dtype="timestamp[ns][pyarrow]")
len(dir(s.dt))

90

In [14]:
s.dt.nanosecond?

[31mType:[39m        property
[31mString form:[39m <property object at 0x7ca2747f6c50>
[31mDocstring:[39m   <no docstring>

In [15]:
s.dt.tz?

[31mType:[39m        property
[31mString form:[39m <property object at 0x7ca2747f6ca0>
[31mDocstring:[39m   <no docstring>

In [16]:
s.dt.tz_localize?

[31mSignature:[39m s.dt.tz_localize(*args, **kwargs)
[31mDocstring:[39m <no docstring>
[31mFile:[39m      ~/Documents/proj/effective-pandas-2/pandas-env/lib/python3.13/site-packages/pandas/core/accessor.py
[31mType:[39m      method