# DataFrame from Dictionary

In [1]:
dict={
    "country":["Brazil","Russia", "India"],
    "capital":["Brazilia","Moscow", "New Delhi"],
    "area":[8.565,16.444,3.322],
    "population":[200,145.3,1252]
}

import pandas as pd
bricks = pd.DataFrame(dict)

In [2]:
bricks.head(2)

Unnamed: 0,country,capital,area,population
0,Brazil,Brazilia,8.565,200.0
1,Russia,Moscow,16.444,145.3


In [3]:
bricks.index=["BR","RU","IN"]
bricks

Unnamed: 0,country,capital,area,population
BR,Brazil,Brazilia,8.565,200.0
RU,Russia,Moscow,16.444,145.3
IN,India,New Delhi,3.322,1252.0


In [None]:
dont run
bricks=pd.read_csv("path/to/bricks.csv")

In [14]:
bricks["country"]

BR    Brazil
RU    Russia
IN     India
Name: country, dtype: object

In [15]:
type(bricks["country"])

pandas.core.series.Series

In [16]:
type(bricks[["country"]])

pandas.core.frame.DataFrame

In [17]:
#column access
bricks[["country","capital"]]

Unnamed: 0,country,capital
BR,Brazil,Brazilia
RU,Russia,Moscow
IN,India,New Delhi


In [18]:
#row access
bricks[1:4]

Unnamed: 0,country,capital,area,population
RU,Russia,Moscow,16.444,145.3
IN,India,New Delhi,3.322,1252.0


In [19]:
# loc iloc row access
bricks.loc["RU"]

country       Russia
capital       Moscow
area          16.444
population     145.3
Name: RU, dtype: object

In [20]:
bricks.loc[["RU"]]

Unnamed: 0,country,capital,area,population
RU,Russia,Moscow,16.444,145.3


In [21]:
bricks.loc[["RU","IN"]]

Unnamed: 0,country,capital,area,population
RU,Russia,Moscow,16.444,145.3
IN,India,New Delhi,3.322,1252.0


In [22]:
bricks.loc[["RU","IN"],["coutry","capital"]]

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  """Entry point for launching an IPython kernel.


Unnamed: 0,coutry,capital
RU,,Moscow
IN,,New Delhi


In [23]:
bricks.loc[:, ["country","capital"]]

Unnamed: 0,country,capital
BR,Brazil,Brazilia
RU,Russia,Moscow
IN,India,New Delhi


## difference between loc and iloc


In [24]:
bricks.loc[["RU"]]

Unnamed: 0,country,capital,area,population
RU,Russia,Moscow,16.444,145.3


In [25]:
#Row access iloc
bricks.iloc[[1]]

Unnamed: 0,country,capital,area,population
RU,Russia,Moscow,16.444,145.3


In [26]:
bricks.iloc[[1,2]]

Unnamed: 0,country,capital,area,population
RU,Russia,Moscow,16.444,145.3
IN,India,New Delhi,3.322,1252.0


In [27]:
#Row and column iloc
bricks.loc[["RU","IN"],["country","capital"]]

Unnamed: 0,country,capital
RU,Russia,Moscow
IN,India,New Delhi


In [28]:
bricks.iloc[[1,2],[0,1]]

Unnamed: 0,country,capital
RU,Russia,Moscow
IN,India,New Delhi


In [29]:
bricks.loc[:,["country","capital"]]

Unnamed: 0,country,capital
BR,Brazil,Brazilia
RU,Russia,Moscow
IN,India,New Delhi


In [30]:
bricks.iloc[:,[0,1]]

Unnamed: 0,country,capital
BR,Brazil,Brazilia
RU,Russia,Moscow
IN,India,New Delhi


## Filtering Pandas DataFrame

select countries with area over 8 million km2

In [31]:
#get column
bricks["area"]

BR     8.565
RU    16.444
IN     3.322
Name: area, dtype: float64

In [32]:
#compare
bricks["area"] > 8

BR     True
RU     True
IN    False
Name: area, dtype: bool

In [33]:
is_huge=bricks["area"]>8

In [34]:
is_huge

BR     True
RU     True
IN    False
Name: area, dtype: bool

In [35]:
bricks[is_huge]

Unnamed: 0,country,capital,area,population
BR,Brazil,Brazilia,8.565,200.0
RU,Russia,Moscow,16.444,145.3


In [36]:
bricks[bricks['area'] > 8]

Unnamed: 0,country,capital,area,population
BR,Brazil,Brazilia,8.565,200.0
RU,Russia,Moscow,16.444,145.3


In [37]:
#boolean operators
import numpy as np
np.logical_and(bricks["area"] > 8, bricks["area"] <10)

BR     True
RU    False
IN    False
Name: area, dtype: bool

In [38]:
bricks[np.logical_and(bricks["area"] > 8, bricks["area"] <10)]

Unnamed: 0,country,capital,area,population
BR,Brazil,Brazilia,8.565,200.0


## Loop data structures
## iterrows

In [39]:
bricks

Unnamed: 0,country,capital,area,population
BR,Brazil,Brazilia,8.565,200.0
RU,Russia,Moscow,16.444,145.3
IN,India,New Delhi,3.322,1252.0


In [13]:
#to print column names
for val in bricks:
    print(val)

country
capital
area
population


In [41]:
#iterrows
for lab,row in bricks.iterrows():
    print(lab)
    print(row)

BR
country         Brazil
capital       Brazilia
area             8.565
population         200
Name: BR, dtype: object
RU
country       Russia
capital       Moscow
area          16.444
population     145.3
Name: RU, dtype: object
IN
country           India
capital       New Delhi
area              3.322
population         1252
Name: IN, dtype: object


In [42]:
#selective print
for lab,row in bricks.iterrows():
    print(lab + ":" + row["capital"])

BR:Brazilia
RU:Moscow
IN:New Delhi


In [44]:
##ADD COLUMN
for lab,row in bricks.iterrows():
    bricks.loc[lab, "name_length"]=len(row["country"])
print(bricks)

   country    capital    area  population  name_length
BR  Brazil   Brazilia   8.565       200.0          6.0
RU  Russia     Moscow  16.444       145.3          6.0
IN   India  New Delhi   3.322      1252.0          5.0


In [46]:
#Alternative to the above, will overwrite previous output
##APPLY
bricks["name_length"] = bricks["country"].apply(len)
print(bricks)

   country    capital    area  population  name_length
BR  Brazil   Brazilia   8.565       200.0            6
RU  Russia     Moscow  16.444       145.3            6
IN   India  New Delhi   3.322      1252.0            5


In [47]:
bricks["name_length2"] = bricks["country"].apply(len)
print(bricks)

   country    capital    area  population  name_length  name_length2
BR  Brazil   Brazilia   8.565       200.0            6             6
RU  Russia     Moscow  16.444       145.3            6             6
IN   India  New Delhi   3.322      1252.0            5             5
