In [1]:
import pandas as pd

#### Create a car inventory

In [2]:
cars = pd.DataFrame({'Car'       : ['Chevrolet Volt', 'Cadillac CTS', 'Jaguar F Type', 
                                    'Hyundai Sonata', 'Ford Fiesta', 'Tesla Model S', 
                                    'Volkswagen GTI', 'Volvo S6'],
                    
                     'Colors'    : ['Red', 'Silver', 'Yellow', 'Black', 'Silver', 
                                    'Red', 'Blue', 'Black'],
                     
                     'CarID'     : [21, 34, 11, 23, 17, 39, 9, 12]
                    })

cars

Unnamed: 0,Car,Colors,CarID
0,Chevrolet Volt,Red,21
1,Cadillac CTS,Silver,34
2,Jaguar F Type,Yellow,11
3,Hyundai Sonata,Black,23
4,Ford Fiesta,Silver,17
5,Tesla Model S,Red,39
6,Volkswagen GTI,Blue,9
7,Volvo S6,Black,12


#### Create an additional inventory

In [3]:
more_cars = pd.DataFrame({'Car'    : ['Audi S6', 'Porsche Panamera'],
                    
                          'Colors' : ['Red', 'Silver'],
                     
                          'CarID'  : [14, 5]
                    })

more_cars

Unnamed: 0,Car,Colors,CarID
0,Audi S6,Red,14
1,Porsche Panamera,Silver,5


#### Concatenate two dataframes
Given the columns are identical, rows from one DataFrame will simply be appended to the other. The index is also appended which is why we end up with duplicate index values.

In [4]:
pd.concat([cars, more_cars])

Unnamed: 0,Car,Colors,CarID
0,Chevrolet Volt,Red,21
1,Cadillac CTS,Silver,34
2,Jaguar F Type,Yellow,11
3,Hyundai Sonata,Black,23
4,Ford Fiesta,Silver,17
5,Tesla Model S,Red,39
6,Volkswagen GTI,Blue,9
7,Volvo S6,Black,12
0,Audi S6,Red,14
1,Porsche Panamera,Silver,5


#### Concatenate, but ignore index values
Here, the DataFrames are concatenated and then the index is set for the concatenated DataFrame

In [5]:
pd.concat([cars, more_cars], ignore_index = True)

Unnamed: 0,Car,Colors,CarID
0,Chevrolet Volt,Red,21
1,Cadillac CTS,Silver,34
2,Jaguar F Type,Yellow,11
3,Hyundai Sonata,Black,23
4,Ford Fiesta,Silver,17
5,Tesla Model S,Red,39
6,Volkswagen GTI,Blue,9
7,Volvo S6,Black,12
8,Audi S6,Red,14
9,Porsche Panamera,Silver,5


#### Create a DataFrame containing car orders
The CarID field maps to the CarID in the cars inventory

In [6]:
car_orders = pd.DataFrame({'OrderID'  :    [201, 202, 203, 204, 205, 206, 207, 208],
                        
                           'Quantity' :    [1, 2, 1, 8, 5, 10, 3, 1],
                           
                           'DownPayment' : ['$5,959', '$19,500', '$33,102', '$5,700', 
                                            '$9,201', '$78,950', '$21,310', '$11,321'], 
                           
                           'CarID'       : [23, 39, 17, 9, 12, 21, 34, 11]
                         })

car_orders

Unnamed: 0,OrderID,Quantity,DownPayment,CarID
0,201,1,"$5,959",23
1,202,2,"$19,500",39
2,203,1,"$33,102",17
3,204,8,"$5,700",9
4,205,5,"$9,201",12
5,206,10,"$78,950",21
6,207,3,"$21,310",34
7,208,1,"$11,321",11


#### Concatenate the car inventory with the car orders data
The rows of the car_orders will be appended to the cars DataFrame. Since only the CarID is common, the remaining fields will be populated with NaN values

In [7]:
pd.concat([cars, car_orders])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.


  """Entry point for launching an IPython kernel.


Unnamed: 0,Car,CarID,Colors,DownPayment,OrderID,Quantity
0,Chevrolet Volt,21,Red,,,
1,Cadillac CTS,34,Silver,,,
2,Jaguar F Type,11,Yellow,,,
3,Hyundai Sonata,23,Black,,,
4,Ford Fiesta,17,Silver,,,
5,Tesla Model S,39,Red,,,
6,Volkswagen GTI,9,Blue,,,
7,Volvo S6,12,Black,,,
0,,23,,"$5,959",201.0,1.0
1,,39,,"$19,500",202.0,2.0


#### The concatenation of cars and car_orders is more natural along columns
We specify the axis=1 argument in order for the concatenation to be performed along the columns. Though this makes more sense than the row concatenation, the orders are not mapped the the corresponding cars since this is a simple concatenation

In [8]:
pd.concat([cars, car_orders], axis = 1)

Unnamed: 0,Car,Colors,CarID,OrderID,Quantity,DownPayment,CarID.1
0,Chevrolet Volt,Red,21,201,1,"$5,959",23
1,Cadillac CTS,Silver,34,202,2,"$19,500",39
2,Jaguar F Type,Yellow,11,203,1,"$33,102",17
3,Hyundai Sonata,Black,23,204,8,"$5,700",9
4,Ford Fiesta,Silver,17,205,5,"$9,201",12
5,Tesla Model S,Red,39,206,10,"$78,950",21
6,Volkswagen GTI,Blue,9,207,3,"$21,310",34
7,Volvo S6,Black,12,208,1,"$11,321",11


### Merge for SQL joins
The merge() function can be used to perform a SQL-like join of dataframes

#### Join the cars and car_orders DataFrames
Pandas is smart enough to notice that the CarID column is common to both DataFrames and joins on that field automatically

In [9]:
pd.merge(cars, car_orders)

Unnamed: 0,Car,Colors,CarID,OrderID,Quantity,DownPayment
0,Chevrolet Volt,Red,21,206,10,"$78,950"
1,Cadillac CTS,Silver,34,207,3,"$21,310"
2,Jaguar F Type,Yellow,11,208,1,"$11,321"
3,Hyundai Sonata,Black,23,201,1,"$5,959"
4,Ford Fiesta,Silver,17,203,1,"$33,102"
5,Tesla Model S,Red,39,202,2,"$19,500"
6,Volkswagen GTI,Blue,9,204,8,"$5,700"
7,Volvo S6,Black,12,205,5,"$9,201"


#### Explicitly state the field to join on
In case the join does not occur automatically, you may need to specify the join field using the "on" argument

In [10]:
pd.merge(cars, car_orders, on = 'CarID')  

Unnamed: 0,Car,Colors,CarID,OrderID,Quantity,DownPayment
0,Chevrolet Volt,Red,21,206,10,"$78,950"
1,Cadillac CTS,Silver,34,207,3,"$21,310"
2,Jaguar F Type,Yellow,11,208,1,"$11,321"
3,Hyundai Sonata,Black,23,201,1,"$5,959"
4,Ford Fiesta,Silver,17,203,1,"$33,102"
5,Tesla Model S,Red,39,202,2,"$19,500"
6,Volkswagen GTI,Blue,9,204,8,"$5,700"
7,Volvo S6,Black,12,205,5,"$9,201"


#### Joining when there isn't a 1-1 match
We extend our cars DataFrame with the two additional cars

In [11]:
extended_cars = pd.concat([cars, more_cars], ignore_index = True)

extended_cars

Unnamed: 0,Car,Colors,CarID
0,Chevrolet Volt,Red,21
1,Cadillac CTS,Silver,34
2,Jaguar F Type,Yellow,11
3,Hyundai Sonata,Black,23
4,Ford Fiesta,Silver,17
5,Tesla Model S,Red,39
6,Volkswagen GTI,Blue,9
7,Volvo S6,Black,12
8,Audi S6,Red,14
9,Porsche Panamera,Silver,5


#### Join the extended cars data with the orders data
The cars without corresonding orders are discarded - the default join is an inner join

In [12]:
pd.merge(extended_cars, car_orders, on = 'CarID')  

Unnamed: 0,Car,Colors,CarID,OrderID,Quantity,DownPayment
0,Chevrolet Volt,Red,21,206,10,"$78,950"
1,Cadillac CTS,Silver,34,207,3,"$21,310"
2,Jaguar F Type,Yellow,11,208,1,"$11,321"
3,Hyundai Sonata,Black,23,201,1,"$5,959"
4,Ford Fiesta,Silver,17,203,1,"$33,102"
5,Tesla Model S,Red,39,202,2,"$19,500"
6,Volkswagen GTI,Blue,9,204,8,"$5,700"
7,Volvo S6,Black,12,205,5,"$9,201"


#### Use the "how" argument to set the type of join
Here, we perform a left join. The options are {‘left’, ‘right’, ‘outer’, ‘inner’} with 'inner' being the default

In [13]:
pd.merge(extended_cars, car_orders, on = 'CarID', how = 'left') 

Unnamed: 0,Car,Colors,CarID,OrderID,Quantity,DownPayment
0,Chevrolet Volt,Red,21,206.0,10.0,"$78,950"
1,Cadillac CTS,Silver,34,207.0,3.0,"$21,310"
2,Jaguar F Type,Yellow,11,208.0,1.0,"$11,321"
3,Hyundai Sonata,Black,23,201.0,1.0,"$5,959"
4,Ford Fiesta,Silver,17,203.0,1.0,"$33,102"
5,Tesla Model S,Red,39,202.0,2.0,"$19,500"
6,Volkswagen GTI,Blue,9,204.0,8.0,"$5,700"
7,Volvo S6,Black,12,205.0,5.0,"$9,201"
8,Audi S6,Red,14,,,
9,Porsche Panamera,Silver,5,,,
