# Pandas

One dimensional ndarray with axis labels

#### Installation

In [None]:
! pip install pandas



#### Import the library

In [2]:
import pandas as pd
import numpy as np

#### Convert a list to Series

In [None]:
my_pets = ['Lion','Cat','Birds','Fish']
pd.Series(my_pets)

0     Lion
1      Cat
2    Birds
3     Fish
dtype: object

#### Add index to series

* Length of index list should be equal to length of values in series

In [None]:
my_days = ['Monday','Tuesday','Wednesday','Thursday','Friday']
my_courses = ['Economics','Geography','Finance','Mathametics','History']
pd.Series(my_courses,index=my_days)

Monday         Economics
Tuesday        Geography
Wednesday        Finance
Thursday     Mathametics
Friday           History
dtype: object

In [None]:
days = pd.Series(my_days,index="Day1 Day2 Day3 Day4 Day5".split())
courses = pd.Series(my_courses,index="Day1 Day2 Day3 Day4 Day5".split())

In [None]:
courses['Day5']

'History'

In [None]:
days + ':' + courses

Day1        Monday:Economics
Day2       Tuesday:Geography
Day3       Wednesday:Finance
Day4    Thursday:Mathametics
Day5          Friday:History
dtype: object

#### Series From Dictionary

In [None]:
sports ={'Football':'Spain','NBA':'USA','Cricket':'India'}
sports_series = pd.Series(sports)
sports_series

Football    Spain
NBA           USA
Cricket     India
dtype: object

#### Find value at location by index label

In [None]:
sports_series.loc['Cricket']

'India'

#### Find Value at location by index number

In [None]:
sports_series.iloc[2]

'India'

#### DataFrame

Two-Dimensional, size-mutable, potentially heterogenous tabular data

#### Create a dataframe from random numbers array

In [None]:
my_dataframe = pd.DataFrame(np.random.randn(10,5),index='row1 row2 row3 row4 row5 row6 row7 row8 row9 row10'.split(),columns='column1 column2 column3 column4 column5'.split())
my_dataframe

Unnamed: 0,column1,column2,column3,column4,column5
row1,0.259403,-0.265134,0.842621,0.496901,-0.31872
row2,-0.06915,1.052855,0.022297,-0.278077,1.508068
row3,0.235888,0.75411,-0.92463,1.232044,-0.676358
row4,1.86104,2.096983,-0.434278,-0.952901,0.156523
row5,1.074371,1.519851,-0.180737,0.736275,0.94682
row6,-1.73897,-0.077124,-0.420046,-0.603975,0.271219
row7,0.085268,0.707791,0.81096,-0.457449,0.643091
row8,-0.091393,-0.403601,0.425256,0.542969,-0.571685
row9,0.491791,-0.055839,-0.469492,0.099566,0.050785
row10,0.782362,1.023119,-0.559921,0.18975,-0.508684


#### Selecting a single column

In [None]:
my_dataframe['column1']

row1     0.259403
row2    -0.069150
row3     0.235888
row4     1.861040
row5     1.074371
row6    -1.738970
row7     0.085268
row8    -0.091393
row9     0.491791
row10    0.782362
Name: column1, dtype: float64

#### Add a new column

In [None]:
my_dataframe['column6']=my_dataframe['column1']*2
my_dataframe

Unnamed: 0,column1,column2,column3,column4,column5,column6
row1,0.259403,-0.265134,0.842621,0.496901,-0.31872,0.518806
row2,-0.06915,1.052855,0.022297,-0.278077,1.508068,-0.1383
row3,0.235888,0.75411,-0.92463,1.232044,-0.676358,0.471775
row4,1.86104,2.096983,-0.434278,-0.952901,0.156523,3.722079
row5,1.074371,1.519851,-0.180737,0.736275,0.94682,2.148742
row6,-1.73897,-0.077124,-0.420046,-0.603975,0.271219,-3.47794
row7,0.085268,0.707791,0.81096,-0.457449,0.643091,0.170535
row8,-0.091393,-0.403601,0.425256,0.542969,-0.571685,-0.182786
row9,0.491791,-0.055839,-0.469492,0.099566,0.050785,0.983583
row10,0.782362,1.023119,-0.559921,0.18975,-0.508684,1.564724


#### Drop a column

In [None]:
my_dataframe.drop('column4',axis=1)

Unnamed: 0,column1,column2,column3,column5,column6
row1,0.259403,-0.265134,0.842621,-0.31872,0.518806
row2,-0.06915,1.052855,0.022297,1.508068,-0.1383
row3,0.235888,0.75411,-0.92463,-0.676358,0.471775
row4,1.86104,2.096983,-0.434278,0.156523,3.722079
row5,1.074371,1.519851,-0.180737,0.94682,2.148742
row6,-1.73897,-0.077124,-0.420046,0.271219,-3.47794
row7,0.085268,0.707791,0.81096,0.643091,0.170535
row8,-0.091393,-0.403601,0.425256,-0.571685,-0.182786
row9,0.491791,-0.055839,-0.469492,0.050785,0.983583
row10,0.782362,1.023119,-0.559921,-0.508684,1.564724


In [None]:
my_dataframe

Unnamed: 0,column1,column2,column3,column4,column5,column6
row1,0.259403,-0.265134,0.842621,0.496901,-0.31872,0.518806
row2,-0.06915,1.052855,0.022297,-0.278077,1.508068,-0.1383
row3,0.235888,0.75411,-0.92463,1.232044,-0.676358,0.471775
row4,1.86104,2.096983,-0.434278,-0.952901,0.156523,3.722079
row5,1.074371,1.519851,-0.180737,0.736275,0.94682,2.148742
row6,-1.73897,-0.077124,-0.420046,-0.603975,0.271219,-3.47794
row7,0.085268,0.707791,0.81096,-0.457449,0.643091,0.170535
row8,-0.091393,-0.403601,0.425256,0.542969,-0.571685,-0.182786
row9,0.491791,-0.055839,-0.469492,0.099566,0.050785,0.983583
row10,0.782362,1.023119,-0.559921,0.18975,-0.508684,1.564724


#### Drop from orignal data

In [None]:
my_dataframe.drop('column4',axis=1,inplace=True)
my_dataframe

Unnamed: 0,column1,column2,column3,column5,column6
row1,0.259403,-0.265134,0.842621,-0.31872,0.518806
row2,-0.06915,1.052855,0.022297,1.508068,-0.1383
row3,0.235888,0.75411,-0.92463,-0.676358,0.471775
row4,1.86104,2.096983,-0.434278,0.156523,3.722079
row5,1.074371,1.519851,-0.180737,0.94682,2.148742
row6,-1.73897,-0.077124,-0.420046,0.271219,-3.47794
row7,0.085268,0.707791,0.81096,0.643091,0.170535
row8,-0.091393,-0.403601,0.425256,-0.571685,-0.182786
row9,0.491791,-0.055839,-0.469492,0.050785,0.983583
row10,0.782362,1.023119,-0.559921,-0.508684,1.564724


#### Use index to access row

In [None]:
my_dataframe.iloc[1]

column1   -0.069150
column2    1.052855
column3    0.022297
column5    1.508068
column6   -0.138300
Name: row2, dtype: float64

#### Fnd a value using row and column index

In [None]:
my_dataframe.loc['row7','column2']

0.7077908214939704

In [None]:
my_dataframe.index

Index(['row1', 'row2', 'row3', 'row4', 'row5', 'row6', 'row7', 'row8', 'row9',
       'row10'],
      dtype='object')

#### Add Series to DataFrame

In [None]:
my_dataframe['spin']=['sp1','sp2','sp3','sp4','sp5','sp6','sp7','sp8','sp9','sp10']
my_dataframe

Unnamed: 0,column1,column2,column3,column5,column6,spin
row1,0.259403,-0.265134,0.842621,-0.31872,0.518806,sp1
row2,-0.06915,1.052855,0.022297,1.508068,-0.1383,sp2
row3,0.235888,0.75411,-0.92463,-0.676358,0.471775,sp3
row4,1.86104,2.096983,-0.434278,0.156523,3.722079,sp4
row5,1.074371,1.519851,-0.180737,0.94682,2.148742,sp5
row6,-1.73897,-0.077124,-0.420046,0.271219,-3.47794,sp6
row7,0.085268,0.707791,0.81096,0.643091,0.170535,sp7
row8,-0.091393,-0.403601,0.425256,-0.571685,-0.182786,sp8
row9,0.491791,-0.055839,-0.469492,0.050785,0.983583,sp9
row10,0.782362,1.023119,-0.559921,-0.508684,1.564724,sp10


#### Rename the columns

In [None]:

my_dataframe.rename(columns={
    'column1':'first',
    'column2':'second',
    'column3':'third',
    'column5':'fifth',
    'column6':'sixth',
    },inplace=True)
my_dataframe

Unnamed: 0,first,second,third,fifth,sixth,spin
row1,0.259403,-0.265134,0.842621,-0.31872,0.518806,sp1
row2,-0.06915,1.052855,0.022297,1.508068,-0.1383,sp2
row3,0.235888,0.75411,-0.92463,-0.676358,0.471775,sp3
row4,1.86104,2.096983,-0.434278,0.156523,3.722079,sp4
row5,1.074371,1.519851,-0.180737,0.94682,2.148742,sp5
row6,-1.73897,-0.077124,-0.420046,0.271219,-3.47794,sp6
row7,0.085268,0.707791,0.81096,0.643091,0.170535,sp7
row8,-0.091393,-0.403601,0.425256,-0.571685,-0.182786,sp8
row9,0.491791,-0.055839,-0.469492,0.050785,0.983583,sp9
row10,0.782362,1.023119,-0.559921,-0.508684,1.564724,sp10


#### Read a csv file

In [None]:
data = pd.read_csv('Automobile.csv')
data

Unnamed: 0,symboling,normalized_losses,make,fuel_type,aspiration,number_of_doors,body_style,drive_wheels,engine_location,wheel_base,length,width,height,curb_weight,engine_type,number_of_cylinders,engine_size,fuel_system,bore,stroke,compression_ratio,horsepower,peak_rpm,city_mpg,highway_mpg,price
0,3,168,alfa-romero,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,48.8,2548,dohc,four,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495
1,3,168,alfa-romero,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,48.8,2548,dohc,four,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500
2,1,168,alfa-romero,gas,std,two,hatchback,rwd,front,94.5,171.2,65.5,52.4,2823,ohcv,six,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500
3,2,164,audi,gas,std,four,sedan,fwd,front,99.8,176.6,66.2,54.3,2337,ohc,four,109,mpfi,3.19,3.40,10.0,102,5500,24,30,13950
4,2,164,audi,gas,std,four,sedan,4wd,front,99.4,176.6,66.4,54.3,2824,ohc,five,136,mpfi,3.19,3.40,8.0,115,5500,18,22,17450
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
196,-1,95,volvo,gas,std,four,sedan,rwd,front,109.1,188.8,68.9,55.5,2952,ohc,four,141,mpfi,3.78,3.15,9.5,114,5400,23,28,16845
197,-1,95,volvo,gas,turbo,four,sedan,rwd,front,109.1,188.8,68.8,55.5,3049,ohc,four,141,mpfi,3.78,3.15,8.7,160,5300,19,25,19045
198,-1,95,volvo,gas,std,four,sedan,rwd,front,109.1,188.8,68.9,55.5,3012,ohcv,six,173,mpfi,3.58,2.87,8.8,134,5500,18,23,21485
199,-1,95,volvo,diesel,turbo,four,sedan,rwd,front,109.1,188.8,68.9,55.5,3217,ohc,six,145,idi,3.01,3.40,23.0,106,4800,26,27,22470


#### Description of data inside DataFrame

In [None]:
data.describe()

Unnamed: 0,symboling,normalized_losses,wheel_base,length,width,height,curb_weight,engine_size,bore,stroke,compression_ratio,horsepower,peak_rpm,city_mpg,highway_mpg,price
count,201.0,201.0,201.0,201.0,201.0,201.0,201.0,201.0,201.0,201.0,201.0,201.0,201.0,201.0,201.0,201.0
mean,0.840796,125.189055,98.797015,174.200995,65.889055,53.766667,2555.666667,126.875622,3.329701,3.261741,10.164279,103.263682,5121.393035,25.179104,30.686567,13207.129353
std,1.254802,33.572966,6.066366,12.322175,2.101471,2.447822,517.296727,41.546834,0.268166,0.317875,4.004965,37.389372,479.624905,6.42322,6.81515,7947.066342
min,-2.0,65.0,86.6,141.1,60.3,47.8,1488.0,61.0,2.54,2.07,7.0,48.0,4150.0,13.0,16.0,5118.0
25%,0.0,101.0,94.5,166.8,64.1,52.0,2169.0,98.0,3.15,3.11,8.6,70.0,4800.0,19.0,25.0,7775.0
50%,1.0,122.0,97.0,173.2,65.5,54.1,2414.0,120.0,3.31,3.29,9.0,95.0,5200.0,24.0,30.0,10295.0
75%,2.0,150.0,102.4,183.5,66.6,55.5,2926.0,141.0,3.58,3.46,9.4,116.0,5500.0,30.0,34.0,16500.0
max,3.0,256.0,120.9,208.1,72.0,59.8,4066.0,326.0,3.94,4.17,23.0,262.0,6600.0,49.0,54.0,45400.0


#### Information on DataFrame

provides inferred data types

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 201 entries, 0 to 200
Data columns (total 26 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   symboling            201 non-null    int64  
 1   normalized_losses    201 non-null    int64  
 2   make                 201 non-null    object 
 3   fuel_type            201 non-null    object 
 4   aspiration           201 non-null    object 
 5   number_of_doors      201 non-null    object 
 6   body_style           201 non-null    object 
 7   drive_wheels         201 non-null    object 
 8   engine_location      201 non-null    object 
 9   wheel_base           201 non-null    float64
 10  length               201 non-null    float64
 11  width                201 non-null    float64
 12  height               201 non-null    float64
 13  curb_weight          201 non-null    int64  
 14  engine_type          201 non-null    object 
 15  number_of_cylinders  201 non-null    obj

#### Selecting a column

In [None]:
data['make']

0      alfa-romero
1      alfa-romero
2      alfa-romero
3             audi
4             audi
          ...     
196          volvo
197          volvo
198          volvo
199          volvo
200          volvo
Name: make, Length: 201, dtype: object

#### Indexes on dataset

In [None]:
data.index

RangeIndex(start=0, stop=201, step=1)

#### Find Null Values

In [None]:
data.isnull()

Unnamed: 0,symboling,normalized_losses,make,fuel_type,aspiration,number_of_doors,body_style,drive_wheels,engine_location,wheel_base,length,width,height,curb_weight,engine_type,number_of_cylinders,engine_size,fuel_system,bore,stroke,compression_ratio,horsepower,peak_rpm,city_mpg,highway_mpg,price
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
196,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
197,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
198,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
199,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [None]:
data.isnull().sum()

symboling              0
normalized_losses      0
make                   0
fuel_type              0
aspiration             0
number_of_doors        0
body_style             0
drive_wheels           0
engine_location        0
wheel_base             0
length                 0
width                  0
height                 0
curb_weight            0
engine_type            0
number_of_cylinders    0
engine_size            0
fuel_system            0
bore                   0
stroke                 0
compression_ratio      0
horsepower             0
peak_rpm               0
city_mpg               0
highway_mpg            0
price                  0
dtype: int64

#### Find the percentile (quartile) values for DataFrame

In [None]:
data.describe().T


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
symboling,201.0,0.840796,1.254802,-2.0,0.0,1.0,2.0,3.0
normalized_losses,201.0,125.189055,33.572966,65.0,101.0,122.0,150.0,256.0
wheel_base,201.0,98.797015,6.066366,86.6,94.5,97.0,102.4,120.9
length,201.0,174.200995,12.322175,141.1,166.8,173.2,183.5,208.1
width,201.0,65.889055,2.101471,60.3,64.1,65.5,66.6,72.0
height,201.0,53.766667,2.447822,47.8,52.0,54.1,55.5,59.8
curb_weight,201.0,2555.666667,517.296727,1488.0,2169.0,2414.0,2926.0,4066.0
engine_size,201.0,126.875622,41.546834,61.0,98.0,120.0,141.0,326.0
bore,201.0,3.329701,0.268166,2.54,3.15,3.31,3.58,3.94
stroke,201.0,3.261741,0.317875,2.07,3.11,3.29,3.46,4.17


#### Conditional Query and display selectd columns 

In [None]:
data[data['price']>20000][['make','price']]

Unnamed: 0,make,price
8,audi,23875
11,bmw,20970
12,bmw,21105
13,bmw,24565
14,bmw,30760
15,bmw,41315
16,bmw,36880
44,jaguar,32250
45,jaguar,35550
46,jaguar,36000


#### Multiple coditions for filtering

In [None]:
data[(data['make']=='volvo')&(data['price']<50000)]

Unnamed: 0,symboling,normalized_losses,make,fuel_type,aspiration,number_of_doors,body_style,drive_wheels,engine_location,wheel_base,length,width,height,curb_weight,engine_type,number_of_cylinders,engine_size,fuel_system,bore,stroke,compression_ratio,horsepower,peak_rpm,city_mpg,highway_mpg,price
190,-2,103,volvo,gas,std,four,sedan,rwd,front,104.3,188.8,67.2,56.2,2912,ohc,four,141,mpfi,3.78,3.15,9.5,114,5400,23,28,12940
191,-1,74,volvo,gas,std,four,wagon,rwd,front,104.3,188.8,67.2,57.5,3034,ohc,four,141,mpfi,3.78,3.15,9.5,114,5400,23,28,13415
192,-2,103,volvo,gas,std,four,sedan,rwd,front,104.3,188.8,67.2,56.2,2935,ohc,four,141,mpfi,3.78,3.15,9.5,114,5400,24,28,15985
193,-1,74,volvo,gas,std,four,wagon,rwd,front,104.3,188.8,67.2,57.5,3042,ohc,four,141,mpfi,3.78,3.15,9.5,114,5400,24,28,16515
194,-2,103,volvo,gas,turbo,four,sedan,rwd,front,104.3,188.8,67.2,56.2,3045,ohc,four,130,mpfi,3.62,3.15,7.5,162,5100,17,22,18420
195,-1,74,volvo,gas,turbo,four,wagon,rwd,front,104.3,188.8,67.2,57.5,3157,ohc,four,130,mpfi,3.62,3.15,7.5,162,5100,17,22,18950
196,-1,95,volvo,gas,std,four,sedan,rwd,front,109.1,188.8,68.9,55.5,2952,ohc,four,141,mpfi,3.78,3.15,9.5,114,5400,23,28,16845
197,-1,95,volvo,gas,turbo,four,sedan,rwd,front,109.1,188.8,68.8,55.5,3049,ohc,four,141,mpfi,3.78,3.15,8.7,160,5300,19,25,19045
198,-1,95,volvo,gas,std,four,sedan,rwd,front,109.1,188.8,68.9,55.5,3012,ohcv,six,173,mpfi,3.58,2.87,8.8,134,5500,18,23,21485
199,-1,95,volvo,diesel,turbo,four,sedan,rwd,front,109.1,188.8,68.9,55.5,3217,ohc,six,145,idi,3.01,3.4,23.0,106,4800,26,27,22470


In [None]:
data[(data['make']=='volvo')&(data['price']<50000)][['make','price']]

Unnamed: 0,make,price
190,volvo,12940
191,volvo,13415
192,volvo,15985
193,volvo,16515
194,volvo,18420
195,volvo,18950
196,volvo,16845
197,volvo,19045
198,volvo,21485
199,volvo,22470


#### Dealing with null values in data

In [None]:
df = pd.DataFrame({'value1':[100,np.nan,234,np.nan],
                   'value2':[300,121,np.nan,np.nan],
                   'value3':['XUI','VYU','NMA','IUY']})
df.head()

Unnamed: 0,value1,value2,value3
0,100.0,300.0,XUI
1,,121.0,VYU
2,234.0,,NMA
3,,,IUY


In [None]:
df.isnull()

Unnamed: 0,value1,value2,value3
0,False,False,False
1,True,False,False
2,False,True,False
3,True,True,False


In [None]:
df.isna()

Unnamed: 0,value1,value2,value3
0,False,False,False
1,True,False,False
2,False,True,False
3,True,True,False


In [None]:
df.isna().sum()

value1    2
value2    2
value3    0
dtype: int64

#### Filling null values with mean

In [None]:
df.fillna(df.mean(),inplace=True)
df

Unnamed: 0,value1,value2,value3
0,100.0,300.0,XUI
1,167.0,121.0,VYU
2,234.0,210.5,NMA
3,167.0,210.5,IUY


#### Sorting

In [None]:
df.sort_values(by='value2',ascending=True)

Unnamed: 0,value1,value2,value3
1,167.0,121.0,VYU
2,234.0,210.5,NMA
3,167.0,210.5,IUY
0,100.0,300.0,XUI


In [None]:
data.sort_values(by='price',ascending=False).head()

Unnamed: 0,symboling,normalized_losses,make,fuel_type,aspiration,number_of_doors,body_style,drive_wheels,engine_location,wheel_base,length,width,height,curb_weight,engine_type,number_of_cylinders,engine_size,fuel_system,bore,stroke,compression_ratio,horsepower,peak_rpm,city_mpg,highway_mpg,price
71,1,140,mercedes-benz,gas,std,two,hardtop,rwd,front,112.0,199.2,72.0,55.4,3715,ohcv,eight,304,mpfi,3.8,3.35,8.0,184,4500,14,16,45400
15,0,149,bmw,gas,std,two,sedan,rwd,front,103.5,193.8,67.9,53.7,3380,ohc,six,209,mpfi,3.62,3.39,8.0,182,5400,16,22,41315
70,0,140,mercedes-benz,gas,std,four,sedan,rwd,front,120.9,208.1,71.7,56.7,3900,ohcv,eight,308,mpfi,3.8,3.35,8.0,184,4500,14,16,40960
125,3,128,porsche,gas,std,two,convertible,rwd,rear,89.5,168.9,65.0,51.6,2800,ohcf,six,194,mpfi,3.74,2.9,9.5,207,5900,17,25,37028
16,0,149,bmw,gas,std,four,sedan,rwd,front,110.0,197.0,70.9,56.3,3505,ohc,six,209,mpfi,3.62,3.39,8.0,182,5400,15,20,36880


#### Group By and Count

In [None]:
data['make'].value_counts()

toyota           32
nissan           18
mazda            17
honda            13
mitsubishi       13
subaru           12
volkswagen       12
peugot           11
volvo            11
dodge             9
bmw               8
mercedes-benz     8
plymouth          7
saab              6
audi              6
porsche           4
alfa-romero       3
chevrolet         3
jaguar            3
isuzu             2
renault           2
mercury           1
Name: make, dtype: int64

#### Concatenate dataframes

In [None]:
mm = {'one':[2,3,1,4,5],
      'two':[5,4,3,2,1],
      'letter':['a','a','b','b','c']}
mm1 = pd.DataFrame(mm)
mm1

Unnamed: 0,one,two,letter
0,2,5,a
1,3,4,a
2,1,3,b
3,4,2,b
4,5,1,c


In [None]:
df

Unnamed: 0,value1,value2,value3
0,100.0,300.0,XUI
1,167.0,121.0,VYU
2,234.0,210.5,NMA
3,167.0,210.5,IUY


In [None]:
new_df=pd.concat([df,mm1])
new_df

Unnamed: 0,value1,value2,value3,one,two,letter
0,100.0,300.0,XUI,,,
1,167.0,121.0,VYU,,,
2,234.0,210.5,NMA,,,
3,167.0,210.5,IUY,,,
0,,,,2.0,5.0,a
1,,,,3.0,4.0,a
2,,,,1.0,3.0,b
3,,,,4.0,2.0,b
4,,,,5.0,1.0,c


In [None]:
new_df1=pd.concat([df,mm1],axis=1)
new_df1

Unnamed: 0,value1,value2,value3,one,two,letter
0,100.0,300.0,XUI,2,5,a
1,167.0,121.0,VYU,3,4,a
2,234.0,210.5,NMA,1,3,b
3,167.0,210.5,IUY,4,2,b
4,,,,5,1,c


#### Join and merge

In [4]:
sales = {
    'Jones':10000,
    'Chris':5000,
    'Piyush':440,
    'Meera':6700,
    'Rahul':300
}
region = {
    'Jones':'West',
    'Chris':np.nan,
    'Piyush':'West',
    'Meera':np.nan,
    'Anthony':'East',
    'Ellen':'South',
    'Josh': 'West',
    'Simran': 'East',
    'Oscar':'North'
}

In [5]:
sales_df = pd.DataFrame.from_dict(sales,orient='index',columns=['sales'])
sales_df

Unnamed: 0,sales
Jones,10000
Chris,5000
Piyush,440
Meera,6700
Rahul,300


In [6]:
region_df = pd.DataFrame.from_dict(region,orient='index',columns=['region'])
region_df

Unnamed: 0,region
Jones,West
Chris,
Piyush,West
Meera,
Anthony,East
Ellen,South
Josh,West
Simran,East
Oscar,North


#### Left Join

In [7]:
joined_df = region_df.join(sales_df,how='left')
joined_df

Unnamed: 0,region,sales
Jones,West,10000.0
Chris,,5000.0
Piyush,West,440.0
Meera,,6700.0
Anthony,East,
Ellen,South,
Josh,West,
Simran,East,
Oscar,North,


#### Right Join

In [8]:
joined_df = region_df.join(sales_df,how='right')
joined_df

Unnamed: 0,region,sales
Jones,West,10000
Chris,,5000
Piyush,West,440
Meera,,6700
Rahul,,300


#### Inner Join

In [9]:
joined_df = region_df.join(sales_df,how='inner')
joined_df

Unnamed: 0,region,sales
Jones,West,10000
Chris,,5000
Piyush,West,440
Meera,,6700


#### Outer Join

In [10]:
joined_df = region_df.join(sales_df,how='outer')
joined_df

Unnamed: 0,region,sales
Anthony,East,
Chris,,5000.0
Ellen,South,
Jones,West,10000.0
Josh,West,
Meera,,6700.0
Oscar,North,
Piyush,West,440.0
Rahul,,300.0
Simran,East,


In [11]:
region_df.index.name='names'
sales_df.index.name='names'

#### Merge

In [12]:
pd.merge(region_df,sales_df,on='names',how='left')

Unnamed: 0_level_0,region,sales
names,Unnamed: 1_level_1,Unnamed: 2_level_1
Jones,West,10000.0
Chris,,5000.0
Piyush,West,440.0
Meera,,6700.0
Anthony,East,
Ellen,South,
Josh,West,
Simran,East,
Oscar,North,
