# Dataframe manipulations in pandas

In [3]:
# Dictionary of dictionaries
europe = { 'spain': { 'capital':'madrid', 'population':46.77 },
           'france': { 'capital':'paris', 'population':66.03 },
           'germany': { 'capital':'berlin', 'population':80.62 },
           'norway': { 'capital':'oslo', 'population':5.084 } }


# Print out the capital of France
print(europe['france']['capital'])

# Create sub-dictionary data
data = {'capital':'rome', 'population' : 59.83}

# Add data to europe under key 'italy'
europe['italy'] = data

# Print europe
print(europe)

paris
{'spain': {'capital': 'madrid', 'population': 46.77}, 'france': {'capital': 'paris', 'population': 66.03}, 'germany': {'capital': 'berlin', 'population': 80.62}, 'norway': {'capital': 'oslo', 'population': 5.084}, 'italy': {'capital': 'rome', 'population': 59.83}}


We can use chaining methods like europe['france']['capital'] to get the value of another subkey

In [4]:
print(europe)

{'spain': {'capital': 'madrid', 'population': 46.77}, 'france': {'capital': 'paris', 'population': 66.03}, 'germany': {'capital': 'berlin', 'population': 80.62}, 'norway': {'capital': 'oslo', 'population': 5.084}, 'italy': {'capital': 'rome', 'population': 59.83}}


In [3]:
print(europe['germany'])

{'capital': 'berlin', 'population': 80.62}


In [5]:
print(europe['germany']['capital'])

berlin


In [5]:
# Pre-defined lists
names = ['United States', 'Australia', 'Japan', 'India', 'Russia', 'Morocco', 'Egypt']
dr =  [True, False, False, False, True, True, True]
cpc = [809, 731, 588, 18, 200, 70, 45]

# Import pandas as pd
import pandas as pd

# Create dictionary my_dict with three key:value pairs: my_dict
my_dict = {"country": names, "drives_right": dr, 'cars_per_cap': cpc}
# Build a DataFrame cars from my_dict: cars
cars = pd.DataFrame(my_dict)

# Print cars
row_labels = ['US', 'AUS', 'JAP', 'IN', 'RU', 'MOR', 'EG']


In [6]:
cars.index = row_labels 
#setting up a index 

In [25]:
cars

Unnamed: 0,country,drives_right,cars_per_cap
US,United States,True,809
AUS,Australia,False,731
JAP,Japan,False,588
IN,India,False,18
RU,Russia,True,200
MOR,Morocco,True,70
EG,Egypt,True,45


selecting and slicing the data frame with loc and iloc method



In [38]:
#to select with the square brackets '[]' :
cars['country'] #will retrieve a pandas object (series)
cars[['country']] #will retriece a pandas dataframe table with columns index and one selected

cars[['country', 'drives_right']] #selects multiple columns

cars[0:2] #by selecting the integers in dataframe we select first 2 rows
cars[3:6] #select observations from 4,5,6

Unnamed: 0,country,drives_right,cars_per_cap
IN,India,False,18
RU,Russia,True,200
MOR,Morocco,True,70


In [45]:
cars.loc['RU'] #returning the same result - series object. We can reference either by label, or by position for iloc
cars.iloc[4]

cars.loc[['RU']]  #to select not series object, but a dataframe
cars.iloc[[4]]

Unnamed: 0,country,drives_right,cars_per_cap
RU,Russia,True,200


In [46]:
# Print out drives_right value of Morocco
print(cars.loc[['MOR'], 'drives_right'])

# Print sub-DataFrame
print(cars.loc[['RU', 'MOR'], ['country', 'drives_right']])

MOR    True
Name: drives_right, dtype: bool
     country  drives_right
RU    Russia          True
MOR  Morocco          True


In [48]:
#another example to select all rows
cars.loc[:, 'country']
cars.iloc[:, 1]

cars.loc[:, ['country','drives_right']]
cars.iloc[:, [1, 2]]

Unnamed: 0,drives_right,cars_per_cap
US,True,809
AUS,False,731
JAP,False,588
IN,False,18
RU,True,200
MOR,True,70
EG,True,45


## Fitering arrays by condition or why we type in 'df[df[column] = condition]]'

In [16]:
# we can compare a data series object to a series and later on use this value to perform the filtering:

dr = cars['drives_right']  #define a series object and store it as 'dr'
sel = cars[dr]             #slice the rows that only match a condition in sel
                           #only 'true' values will be passed to slice the df


In [20]:
#we can do it with one line:
dr = cars[cars['drives_right']] #since it returns rows that match condition True

In [19]:
dr

Unnamed: 0,country,drives_right,cars_per_cap
US,United States,True,809
RU,Russia,True,200
MOR,Morocco,True,70
EG,Egypt,True,45


For filtering per condition or multiple conditions, we can perfectly use 'logical_or' or 'logical_and' functions, since it comes in hand when chaining multiple conditions

In [22]:
car_maniac = cars[cars['cars_per_cap']>500]
car_maniac

Unnamed: 0,country,drives_right,cars_per_cap
US,United States,True,809
AUS,Australia,False,731
JAP,Japan,False,588


In [31]:
#OR using logical_and for multiple conditions to chain them
import numpy as np
car_maniac = cars[np.logical_and(cars['cars_per_cap']>100, cars['cars_per_cap'] > 500)]

In [32]:
car_maniac

Unnamed: 0,country,drives_right,cars_per_cap
US,United States,True,809
AUS,Australia,False,731
JAP,Japan,False,588


## While loop and conditions
Can be extremely helpful when for instance we need to execute until the condition is finally met
Let's look into this example:

In [44]:
x = 1
while x < 4 :
    print(x)
    x = x + 1

1
2
3


In [54]:
start_date = 20181010
end_date = 20181001

while start_date>end_date:
    start_date = start_date-1
    print(start_date)

20181009
20181008
20181007
20181006
20181005
20181004
20181003
20181002
20181001


In [55]:
## Another example of the while loop

# Initialize offset
offset = -6

# Code the while loop
while offset != 0 :
    print("correcting...")
    if offset > 0:
        offset = offset - 1
    else:
        offset = offset + 1
    print(offset)

correcting...
-5
correcting...
-4
correcting...
-3
correcting...
-2
correcting...
-1
correcting...
0


### Using 'for' loops to iterate over