## Chapter 18 - DataFrames

In [1]:
import pandas as pd

df = pd.DataFrame({
    'growth': [0.5, 0.7, 1.2],
    'Name': ['Paul', 'George', 'Ringo']
})

print(df)

   growth    Name
0     0.5    Paul
1     0.7  George
2     1.2   Ringo


In [2]:
df.iloc[2]

growth      1.2
Name      Ringo
Name: 2, dtype: object

In [3]:
df['Name']

0      Paul
1    George
2     Ringo
Name: Name, dtype: object

In [4]:
type(df['Name'])

pandas.core.series.Series

In [5]:
df.Name

0      Paul
1    George
2     Ringo
Name: Name, dtype: object

In [10]:
import numpy as np
np.random.seed(42)
(pd.DataFrame(np.random.randn(10, 3),
              columns=['a', 'b', 'c']))

Unnamed: 0,a,b,c
0,0.496714,-0.138264,0.647689
1,1.52303,-0.234153,-0.234137
2,1.579213,0.767435,-0.469474
3,0.54256,-0.463418,-0.46573
4,0.241962,-1.91328,-1.724918
5,-0.562288,-1.012831,0.314247
6,-0.908024,-1.412304,1.465649
7,-0.225776,0.067528,-1.424748
8,-0.544383,0.110923,-1.150994
9,0.375698,-0.600639,-0.291694


In [11]:
df.axes

[RangeIndex(start=0, stop=3, step=1),
 Index(['growth', 'Name'], dtype='object')]

In [12]:
df.sum(axis=0)

growth                2.4
Name      PaulGeorgeRingo
dtype: object

In [14]:
# df.sum(axis=1)

In [None]:
df.axes[0]

RangeIndex(start=0, stop=3, step=1)

## Chapter 19 - Similarities with Series and DataFrames

In [26]:
import pandas as pd

url = 'https://en.wikipedia.org/wiki/Historical_rankings_of_presidents_of_the_United_States'
pres_dfs = pd.read_html(url, dtype_backend='pyarrow')
df = pres_dfs[3]

In [31]:
df = (df
      .iloc[:-1]
      .rename(columns={'Political party': 'Party'})
      .assign(Party=lambda df_:
              df_
              .Party
              .str.replace(r'\[.*\]', ''))
              .astype('category')
)

df

Unnamed: 0,Seq.,President,Party,Bg,PL,CAb,RC,CAp,HE,L,...,Im,DA,Int,EAb,FPA,LA,IQ,AM,EV,O
0,1,George Washington,Independent,7,18,12,3,3,4,1,...,9,4,2,2,3,1,12,1,3,4
1,2,John Adams,Federalist,4,29,18,26,10,13,23,...,17,22,3,19,12,20,7,15,12,17
2,3,Thomas Jefferson,Democratic-Republican,1,4,6,4,6,16,6,...,3,6,14,5,7,6,1,6,5,5
3,4,James Madison,Democratic-Republican,3,10,11,9,7,12,17,...,8,12,5,14,20,17,2,10,8,6
4,5,James Monroe,Democratic-Republican,9,12,15,8,14,9,9,...,16,8,10,11,2,13,15,7,9,7
5,6,John Quincy Adams,Democratic-Republican,2,34,20,35,16,14,30,...,11,18,4,21,16,26,5,20,21,19
6,7,Andrew Jackson,Democratic,30,2,10,14,27,28,4,...,13,14,23,6,19,5,23,12,13,14
7,8,Martin Van Buren,Democratic,16,13,23,19,24,38,33,...,24,27,29,23,25,27,22,27,24,23
8,9,William Henry Harrison,Whig,24,30,25,31,33,27,42,...,35,36,30,33,39,24,31,33,34,35
9,10,John Tyler,Independent[d],33,42,39,42,39,31,22,...,29,34,33,37,35,36,33,32,36,37


In [33]:
url = 'https://github.com/mattharrison/datasets/raw/master/data/siena2018-pres.csv'
df = pd.read_csv(url, index_col=0, dtype_backend='pyarrow')
df

Unnamed: 0,Seq.,President,Party,Bg,Im,Int,IQ,L,WR,AC,...,PL,RC,CAp,HE,EAp,DA,FPA,AM,EV,O
1,1,George Washington,Independent,7,7,1,10,1,6,2,...,18,1,1,1,1,2,2,1,2,1
2,2,John Adams,Federalist,3,13,4,4,24,14,31,...,28,17,4,13,15,19,13,16,10,14
3,3,Thomas Jefferson,Democratic-Republican,2,2,14,1,8,5,14,...,5,5,7,20,4,6,9,7,5,5
4,4,James Madison,Democratic-Republican,4,6,7,3,16,15,6,...,9,10,6,14,7,11,19,11,8,7
5,5,James Monroe,Democratic-Republican,9,14,11,18,6,16,7,...,12,8,11,9,9,10,5,6,9,8
6,6,John Quincy Adams,Democratic-Republican,1,9,6,5,29,19,24,...,29,29,15,17,18,21,15,14,18,18
7,7,Andrew Jackson,Democratic,37,15,29,28,4,4,38,...,6,16,30,25,25,17,23,20,19,19
8,8,Martin Van Buren,Democratic,23,22,27,25,34,28,20,...,16,23,25,31,26,29,27,24,28,25
9,9,William Henry Harrison,Whig,22,38,28,37,44,32,41,...,36,37,42,41,40,42,44,37,39,39
10,10,John Tyler,Independent,34,33,35,34,22,26,37,...,41,40,38,34,36,36,26,32,36,37


In [34]:
df.dtypes

Seq.         string[pyarrow]
President    string[pyarrow]
Party        string[pyarrow]
Bg            int64[pyarrow]
Im            int64[pyarrow]
Int           int64[pyarrow]
IQ            int64[pyarrow]
L             int64[pyarrow]
WR            int64[pyarrow]
AC            int64[pyarrow]
EAb           int64[pyarrow]
LA            int64[pyarrow]
CAb           int64[pyarrow]
OA            int64[pyarrow]
PL            int64[pyarrow]
RC            int64[pyarrow]
CAp           int64[pyarrow]
HE            int64[pyarrow]
EAp           int64[pyarrow]
DA            int64[pyarrow]
FPA           int64[pyarrow]
AM            int64[pyarrow]
EV            int64[pyarrow]
O             int64[pyarrow]
dtype: object

switching to the code from the author