# DataFrames Manipulations

In [2]:
import pandas as pd

## Weather datasets

In [3]:
eu_weather_df = pd.DataFrame({
  "town": ["Atina", "Oslo", "London"],
  "temp":[35,21,25],
  "rain": [False, False, True ]
})
eu_weather_df

Unnamed: 0,town,temp,rain
0,Atina,35,False
1,Oslo,21,False
2,London,25,True


In [4]:
bg_weather_df = pd.DataFrame({
  "town": ["Sofia", "Sandanski", "Pleven"],
  "temp":[25,32,21],
  "rain": [False, False, True ]
})
bg_weather_df

Unnamed: 0,town,temp,rain
0,Sofia,25,False
1,Sandanski,32,False
2,Pleven,21,True


### Re-arange columns

In [5]:
cols = ["town", "temp", "rain"]

In [6]:
eu_weather_df = eu_weather_df[cols]
eu_weather_df

Unnamed: 0,town,temp,rain
0,Atina,35,False
1,Oslo,21,False
2,London,25,True


In [12]:
bg_weather_df = bg_weather_df[cols]
bg_weather_df

Unnamed: 0,town,temp,rain
0,Sofia,25,False
1,Sandanski,32,False
2,Pleven,21,True


### Add columns

In [13]:
if not "wind" in eu_weather_df.columns:
    eu_weather_df.insert(3,"wind",[1.5, 7.5, 4])
eu_weather_df

Unnamed: 0,town,temp,rain,wind
0,Atina,35,False,1.5
1,Oslo,21,False,7.5
2,London,25,True,4.0


### Add columns from another dataframe

In [16]:
wind_df=pd.DataFrame([3.4, 2,6.5], columns=["wind"])
wind_df

Unnamed: 0,wind
0,3.4
1,2.0
2,6.5


In [21]:
bg_weather_df= pd.concat([bg_weather_df, wind_df],axis=1)
bg_weather_df

Unnamed: 0,town,temp,rain,wind,wind.1
0,Sofia,25,False,3.4,3.4
1,Sandanski,32,False,2.0,2.0
2,Pleven,21,True,6.5,6.5


### Deleting columns

#### in place: with del or pop()

In [22]:
del bg_weather_df["wind"]
bg_weather_df

Unnamed: 0,town,temp,rain
0,Sofia,25,False
1,Sandanski,32,False
2,Pleven,21,True


#### in new dataframe

In [31]:
# we have to specify axes=1, in order to drop columns, not rows
df_tmp = eu_weather_df.drop(["wind","rain"], axis=1)
df_tmp

Unnamed: 0,town,temp
0,Atina,35
1,Oslo,21
2,London,25


In [30]:
eu_weather_df

Unnamed: 0,town,temp,rain,wind
0,Atina,35,False,1.5
1,Oslo,21,False,7.5
2,London,25,True,4.0


### Append new rows

#### with append()

In [36]:
# keep indexes original indexes:
# appended_weather = eu_weather_df.append(bg_weather_df, sort=False)

#auto indexing
appended_weather = eu_weather_df.append(bg_weather_df,ignore_index=True)

appended_weather

Unnamed: 0,rain,temp,town,wind
0,False,35,Atina,1.5
1,False,21,Oslo,7.5
2,True,25,London,4.0
3,False,25,Sofia,
4,False,32,Sandanski,
5,True,21,Pleven,


#### with concat() - for rows, i.e. axis=0

In [47]:
# keep indexes
concat_weather = pd.concat([eu_weather_df, bg_weather_df],sort=False)

# auto indexing:
# concat_weather = pd.concat([eu_weather_df, bg_weather_df], ignore_index=True, sort=False)

# add keys for each DF
concat_weather = pd.concat([eu_weather_df, bg_weather_df],keys=["EU", "BG"],sort=False)

# later, we car retrieve by index location:
# concat_weather.loc["BG"]

concat_weather



Unnamed: 0,Unnamed: 1,town,temp,rain,wind
EU,0,Atina,35,False,1.5
EU,1,Oslo,21,False,7.5
EU,2,London,25,True,4.0
BG,0,Sofia,25,False,
BG,1,Sandanski,32,False,
BG,2,Pleven,21,True,


### Concat dataframe as new columns, i.e. axis=1

In [51]:
print(eu_weather_df)
print(bg_weather_df)
concat_weather_as_columns = pd.concat([eu_weather_df, bg_weather_df], axis=1)
concat_weather_as_columns

     town  temp   rain  wind
0   Atina    35  False   1.5
1    Oslo    21  False   7.5
2  London    25   True   4.0
        town  temp   rain
0      Sofia    25  False
1  Sandanski    32  False
2     Pleven    21   True


Unnamed: 0,town,temp,rain,wind,town.1,temp.1,rain.1
0,Atina,35,False,1.5,Sofia,25,False
1,Oslo,21,False,7.5,Sandanski,32,False
2,London,25,True,4.0,Pleven,21,True


## DataFrame Object summary statistics

### describe()

In [None]:
print(bg_weather_df)
bg_weather_df.describe()

####  count(), max(), min(),mean(), std()

In [None]:
print(bg_weather_df)
print("***count:\n", bg_weather_df.count())
print("***min:\n", bg_weather_df.min())
print("***std:\n", bg_weather_df.std())
