# Data Analysis with Python and Pandas tutorial
# Exercise 3: Vietnam weather data

For this exercise you need the VN-humidity.xslx and VN-temperature.xlsx files. These are available at:

https://1drv.ms/u/s!AgtH78k0_cuvglZTACkxCrOl0vr0?e=nHd1kK

In [1]:
# import the pandas library
import pandas as pd

In [2]:
# read the VN-temperature.xlsx file into a dataframe (df1)
df1 = pd.read_excel('VN-temperature.xlsx')

In [3]:
# print the shape of the dataframe
df1.shape

(2880, 5)

In [4]:
# print a few rows (head)
df1.head()

Unnamed: 0,DIAPHUONG,VUNG,THANG,NAM,NHIETDO
0,Lai Chau,BAC,1,2017,14.8
1,Son La,BAC,1,2017,17.1
2,Tuyen Quang,BAC,1,2017,19.0
3,Ha Noi,BAC,1,2017,19.7
4,Bai Chay,BAC,1,2017,19.2


In [5]:
# define a column mapping to rename columns to english and lowercase
col_mapping = {
    'DIAPHUONG': 'city',
    'NHIETDO': 'temperature',
    'THANG': 'month',
    'NAM': 'year',
    'VUNG': 'region',
    'DO_AM': 'humidity'
}
# optionally, print the mapping to review it
col_mapping

{'DIAPHUONG': 'city',
 'NHIETDO': 'temperature',
 'THANG': 'month',
 'NAM': 'year',
 'VUNG': 'region',
 'DO_AM': 'humidity'}

In [6]:
# apply the column mapping to rename the columns
# do the renaming inplace
df1.rename(columns=col_mapping, inplace=True)

In [7]:
# print head again
df1.head(2)

Unnamed: 0,city,region,month,year,temperature
0,Lai Chau,BAC,1,2017,14.8
1,Son La,BAC,1,2017,17.1


In [8]:
df2 = pd.read_excel('VN-humidity.xlsx')
df2.shape

(2880, 5)

In [14]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2880 entries, 0 to 2879
Data columns (total 5 columns):
city           2880 non-null object
region         2880 non-null object
month          2880 non-null int64
year           2880 non-null int64
temperature    2879 non-null float64
dtypes: float64(1), int64(2), object(2)
memory usage: 112.6+ KB


In [10]:
df1.region.value_counts()

TRUNG         1152
BAC           1152
NAM            384
TAY NGUYEN     192
Name: region, dtype: int64

In [11]:
df2.rename(columns=col_mapping, inplace=True)

In [12]:
df2.head()

Unnamed: 0,humidity,month,year,city,region
0,83,1,2017,Lai Chau,BAC
1,82,1,2017,Son La,BAC
2,83,1,2017,Tuyen Quang,BAC
3,77,1,2017,Ha Noi,BAC
4,82,1,2017,Bai Chay,BAC


In [15]:
df = pd.merge(df1, df2, on=['year', 'month', 'region', 'city'], how='outer')

In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2880 entries, 0 to 2879
Data columns (total 6 columns):
city           2880 non-null object
region         2880 non-null object
month          2880 non-null int64
year           2880 non-null int64
temperature    2879 non-null float64
humidity       2880 non-null int64
dtypes: float64(1), int64(3), object(2)
memory usage: 157.5+ KB


In [17]:
df.head()

Unnamed: 0,city,region,month,year,temperature,humidity
0,Lai Chau,BAC,1,2017,14.8,83
1,Son La,BAC,1,2017,17.1,82
2,Tuyen Quang,BAC,1,2017,19.0,83
3,Ha Noi,BAC,1,2017,19.7,77
4,Bai Chay,BAC,1,2017,19.2,82


In [18]:
df[(df.year == 2011) & (df.month == 2)]

Unnamed: 0,city,region,month,year,temperature,humidity
330,Lai Chau,BAC,2,2011,,99
331,Son La,BAC,2,2011,16.7,80
332,Tuyen Quang,BAC,2,2011,17.6,83
333,Ha Noi,BAC,2,2011,17.7,83
334,Bai Chay,BAC,2,2011,16.4,87
335,Nam Dinh,BAC,2,2011,17.2,86
336,Vinh,TRUNG,2,2011,17.7,88
337,Hue,TRUNG,2,2011,19.4,91
338,Da Nang,TRUNG,2,2011,21.5,81
339,Qui Nhon,TRUNG,2,2011,23.8,78


In [19]:
df[df.temperature.isna()]

Unnamed: 0,city,region,month,year,temperature,humidity
330,Lai Chau,BAC,2,2011,,99


In [20]:
df.loc[(df.city == 'Lai Chau') & (df.year == 2011) & (df.month == 2) & (df.region == 'BAC'), 'temperature'] = 20.0

In [22]:
df1.set_index(['year', 'month', 'region', 'city'], inplace=True)
df2.set_index(['year', 'month', 'region', 'city'], inplace=True)

In [23]:
df = pd.concat([df1, df2], axis=1)  # , keys=['year', 'month', 'region', 'locale'])

In [24]:
df1

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,temperature
year,month,region,city,Unnamed: 4_level_1
2017,1,BAC,Lai Chau,14.8
2017,1,BAC,Son La,17.1
2017,1,BAC,Tuyen Quang,19.0
2017,1,BAC,Ha Noi,19.7
2017,1,BAC,Bai Chay,19.2
2017,1,BAC,Nam Dinh,19.2
2017,1,TRUNG,Vinh,19.6
2017,1,TRUNG,Hue,21.4
2017,1,TRUNG,Da Nang,23.1
2017,1,TRUNG,Qui Nhon,24.6
