In [23]:
import pandas as pd

In [24]:
long_df = pd.read_csv('datasets/long_data.csv', usecols=['date', 'datatype', 'value']).rename(
    columns={'value':'temp_C'}
    ).assign(
    date = lambda x: pd.to_datetime(x.date),
    # value is temp in C, transform into F
    temp_F = lambda x: (x.temp_C * 9/5) + 32
)

In [25]:
# for every day 2 values TMAX, TMIN, TOBS
long_df.head()

Unnamed: 0,datatype,date,temp_C,temp_F
0,TMAX,2018-10-01,21.1,69.98
1,TMIN,2018-10-01,8.9,48.02
2,TOBS,2018-10-01,13.9,57.02
3,TMAX,2018-10-02,23.9,75.02
4,TMIN,2018-10-02,13.9,57.02


### Pivot 
- `pivot(index=..., columns=..., values=...)`, or
- `pivot_table(index=..., columns=..., values=...)`

`index` - index, `columns` - columns to make `wide`, `values` - values to put in
switch to wide format

In [26]:
long_df.pivot_table(index='date', columns='datatype', values='temp_C').head()

datatype,TMAX,TMIN,TOBS
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-10-01,21.1,8.9,13.9
2018-10-02,23.9,13.9,17.2
2018-10-03,25.0,15.6,16.1
2018-10-04,22.8,11.7,11.7
2018-10-05,23.3,11.7,18.9


In [27]:
long_df.pivot(index='date', columns='datatype', values='temp_C').head()

datatype,TMAX,TMIN,TOBS
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-10-01,21.1,8.9,13.9
2018-10-02,23.9,13.9,17.2
2018-10-03,25.0,15.6,16.1
2018-10-04,22.8,11.7,11.7
2018-10-05,23.3,11.7,18.9


In [28]:
# hierarchical index, to access data first temp_C, then TMIN
long_df.pivot(index='date', columns='datatype', values=['temp_C', 'temp_F']).head()

Unnamed: 0_level_0,temp_C,temp_C,temp_C,temp_F,temp_F,temp_F
datatype,TMAX,TMIN,TOBS,TMAX,TMIN,TOBS
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2018-10-01,21.1,8.9,13.9,69.98,48.02,57.02
2018-10-02,23.9,13.9,17.2,75.02,57.02,62.96
2018-10-03,25.0,15.6,16.1,77.0,60.08,60.98
2018-10-04,22.8,11.7,11.7,73.04,53.06,53.06
2018-10-05,23.3,11.7,18.9,73.94,53.06,66.02


In [29]:
tmax_f = long_df.pivot(index='date', columns='datatype', values=['temp_C', 'temp_F'])['temp_F']['TMAX']

In [30]:
tmax_f.head()

date
2018-10-01    69.98
2018-10-02    75.02
2018-10-03    77.00
2018-10-04    73.04
2018-10-05    73.94
Name: TMAX, dtype: float64

### MultiIndex
`unstack()` - make a `wide_df` from `long_df` with `MultiIndex`

In [21]:
# index type -> MultiIndex
long_df.set_index(['date', 'datatype']).head().index

MultiIndex([('2018-10-01', 'TMAX'),
            ('2018-10-01', 'TMIN'),
            ('2018-10-01', 'TOBS'),
            ('2018-10-02', 'TMAX'),
            ('2018-10-02', 'TMIN')],
           names=['date', 'datatype'])

In [31]:
# date - outermost level, datatype - innermost index
long_df.set_index(['date', 'datatype'])

Unnamed: 0_level_0,Unnamed: 1_level_0,temp_C,temp_F
date,datatype,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-10-01,TMAX,21.1,69.98
2018-10-01,TMIN,8.9,48.02
2018-10-01,TOBS,13.9,57.02
2018-10-02,TMAX,23.9,75.02
2018-10-02,TMIN,13.9,57.02
...,...,...,...
2018-10-30,TMIN,2.2,35.96
2018-10-30,TOBS,5.0,41.00
2018-10-31,TMAX,12.2,53.96
2018-10-31,TMIN,0.0,32.00


In [22]:
long_df.set_index(['date', 'datatype']).unstack().head()

Unnamed: 0_level_0,temp_C,temp_C,temp_C,temp_F,temp_F,temp_F
datatype,TMAX,TMIN,TOBS,TMAX,TMIN,TOBS
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2018-10-01,21.1,8.9,13.9,69.98,48.02,57.02
2018-10-02,23.9,13.9,17.2,75.02,57.02,62.96
2018-10-03,25.0,15.6,16.1,77.0,60.08,60.98
2018-10-04,22.8,11.7,11.7,73.04,53.06,53.06
2018-10-05,23.3,11.7,18.9,73.94,53.06,66.02


In [41]:
long_df.append([{
    'datatype':'TAVG',
    'date':'2018-10-01',
    'temp_C':10,
    'temp_F':50
}])

  long_df.append([{


Unnamed: 0,datatype,date,temp_C,temp_F
0,TMAX,2018-10-01 00:00:00,21.1,69.98
1,TMIN,2018-10-01 00:00:00,8.9,48.02
2,TOBS,2018-10-01 00:00:00,13.9,57.02
3,TMAX,2018-10-02 00:00:00,23.9,75.02
4,TMIN,2018-10-02 00:00:00,13.9,57.02
...,...,...,...,...
89,TOBS,2018-10-30 00:00:00,5.0,41.00
90,TMAX,2018-10-31 00:00:00,12.2,53.96
91,TMIN,2018-10-31 00:00:00,0.0,32.00
92,TOBS,2018-10-31 00:00:00,0.0,32.00


Remove the warning of `append`. Use `concat`:
1. Create a row to insert as a DataFrame with index=[0] (or another value)
2. `concat([new_row, df.loc[:]]).reset_index(drop=True)`

In [45]:
pd.concat([pd.DataFrame({
    'datatype':'TAVG',
    'date':'2018-10-01',
    'temp_C':10,
    'temp_F':50
}, index=[0]), long_df.loc[:]]).reset_index(drop=True)

Unnamed: 0,datatype,date,temp_C,temp_F
0,TAVG,2018-10-01,10.0,50.00
1,TMAX,2018-10-01 00:00:00,21.1,69.98
2,TMIN,2018-10-01 00:00:00,8.9,48.02
3,TOBS,2018-10-01 00:00:00,13.9,57.02
4,TMAX,2018-10-02 00:00:00,23.9,75.02
...,...,...,...,...
89,TMIN,2018-10-30 00:00:00,2.2,35.96
90,TOBS,2018-10-30 00:00:00,5.0,41.00
91,TMAX,2018-10-31 00:00:00,12.2,53.96
92,TMIN,2018-10-31 00:00:00,0.0,32.00


In [49]:
pd.concat([pd.DataFrame({
    'datatype':'TAVG',
    'date':'2018-10-01',
    'temp_C':10,
    'temp_F':50
}, index=[0]), long_df.loc[:]]).reset_index(drop=True).set_index(['date', 'datatype']).sort_index()

  pd.concat([pd.DataFrame({


Unnamed: 0_level_0,Unnamed: 1_level_0,temp_C,temp_F
date,datatype,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-10-01,TAVG,10.0,50.00
2018-10-01,TMAX,21.1,69.98
2018-10-01,TMIN,8.9,48.02
2018-10-01,TOBS,13.9,57.02
2018-10-02,TMAX,23.9,75.02
...,...,...,...
2018-10-30,TMIN,2.2,35.96
2018-10-30,TOBS,5.0,41.00
2018-10-31,TMAX,12.2,53.96
2018-10-31,TMIN,0.0,32.00
