# Educative - Advanced Pandas: Going Beyond the Basics
## Chapter 3 - Combining DataFrames
___

In [2]:
import pandas as pd
pd.__version__

'1.5.0'

### Table of Contents
[(1) Primer - pandas Data Structures](#pandas-ds)  
[(2) Concatenate](#concat)  

___
<a class="anchor" id="pandas-ds"></a>
## (1) Primer - pandas Data Structures

### (i) Series

In [3]:
import numpy as np

In [4]:
s1 = pd.Series(np.random.randn(5))
print(s1)

0    0.471174
1    0.943154
2    0.592795
3    0.020496
4   -0.667922
dtype: float64


In [5]:
s2 = pd.Series(['a','b','c','d','e'])

# Getting value by index label (index 3)
print(s2[3])

print('-' * 30)

# Setting value at index label
s2[4] = 'z'
print(s2)

d
------------------------------
0    a
1    b
2    c
3    d
4    z
dtype: object


In [6]:
s3 = pd.Series([1,2,3,4,5])

# Square each value in the series
print(s3 ** 2)

0     1
1     4
2     9
3    16
4    25
dtype: int64


### (ii) DataFrame

In [7]:
# Define dictionary of 3 pandas Series
d1 = {
    "s1": pd.Series([1.0, 2.0], index=["a", "b"]),
    "s2": pd.Series([1.0, 2.0, 3.0], index=["a", "b", "c"]),
    "s3": pd.Series([1.0, 2.0, 3.0, 4.0], index=["a", "b", "c", "d"])
    }

# Create DataFrame object from dictionary of Series
df = pd.DataFrame(d1)
print(df)

    s1   s2   s3
a  1.0  1.0  1.0
b  2.0  2.0  2.0
c  NaN  3.0  3.0
d  NaN  NaN  4.0


In [8]:
df[1:3]

Unnamed: 0,s1,s2,s3
b,2.0,2.0,2.0
c,,3.0,3.0


___
<a class="anchor" id="concat"></a>
## (2) Concatenate

In [9]:
df = pd.read_csv('../data/csv/insured_cars.csv')

### (i) Row-wise Concatenation

In [10]:
df_A = df[df['car_model_year'] <= 2005]
df_A.head()

Unnamed: 0,insured_car_id,car_make,car_model,car_model_year
38,1G6DE5EG4A0258970,Pontiac,Aztek,2005
39,WAUVT54B42N848231,Toyota,Tundra,2005
40,3GYT4LEF1DG807641,Nissan,Frontier,2005
41,SCFFDAAE4CG206577,Infiniti,Q,2004
42,1FMJU1H55AE867096,Pontiac,Grand Prix,2004


In [11]:
df_B = df[df['car_model_year'] >= 2006]
df_B.head()

Unnamed: 0,insured_car_id,car_make,car_model,car_model_year
0,1G6KD54Y55U074446,Rolls-Royce,Phantom,2013
1,5GTMNGEE4A8349979,Fiat,Nuova 500,2012
2,SAJWA4DC1AM015816,Rolls-Royce,Phantom,2012
3,WAUUL78E58A745606,Infiniti,G37,2012
4,JTEBU5JRXD5313620,GMC,Sierra 2500,2012


In [12]:
df_concat = pd.concat([df_A, df_B], 
                      axis=0,
                      ignore_index=True)

In [13]:
df_concat.sort_values(by='car_model_year')

Unnamed: 0,insured_car_id,car_make,car_model,car_model_year
61,WDDEJ7GB8AA509659,Ford,Falcon,1966
60,3C3CFFBR8FT680062,Pontiac,Bonneville,1967
59,WBA3R1C5XFK004020,Chevrolet,Monte Carlo,1973
58,5UXKR2C54E0736382,Lincoln,Continental,1984
56,1HGCR2E54DA590806,Mercedes-Benz,S-Class,1987
...,...,...,...,...
64,SAJWA4DC1AM015816,Rolls-Royce,Phantom,2012
63,5GTMNGEE4A8349979,Fiat,Nuova 500,2012
67,1N6AA0CC8EN395947,Volvo,XC70,2012
65,WAUUL78E58A745606,Infiniti,G37,2012


### (ii) Column-wise Concatenation

In [15]:
df_C = df[['insured_car_id', 'car_make']]
df_C.head()

Unnamed: 0,insured_car_id,car_make
0,1G6KD54Y55U074446,Rolls-Royce
1,5GTMNGEE4A8349979,Fiat
2,SAJWA4DC1AM015816,Rolls-Royce
3,WAUUL78E58A745606,Infiniti
4,JTEBU5JRXD5313620,GMC


In [16]:
df_D = df[['car_model', 'car_model_year']]
df_D.head()

Unnamed: 0,car_model,car_model_year
0,Phantom,2013
1,Nuova 500,2012
2,Phantom,2012
3,G37,2012
4,Sierra 2500,2012


In [19]:
df_concat = pd.concat([df_C, df_D], 
                      axis=1,
#                       ignore_index=True
                     )

In [20]:
df_concat.head()

Unnamed: 0,insured_car_id,car_make,car_model,car_model_year
0,1G6KD54Y55U074446,Rolls-Royce,Phantom,2013
1,5GTMNGEE4A8349979,Fiat,Nuova 500,2012
2,SAJWA4DC1AM015816,Rolls-Royce,Phantom,2012
3,WAUUL78E58A745606,Infiniti,G37,2012
4,JTEBU5JRXD5313620,GMC,Sierra 2500,2012


### (iii) Additional Parameters

#### Hierarchical Indexing

#### Hierarchical Indexing