In [1]:
# import pandas and pyarrow
import pandas as pd
# import pyarrow

In [2]:
pd.options.display.float_format = '{:,.2f}'.format
pd.set_option('display.width', 68)
pd.set_option('display.max_columns', 3)

In [3]:
# import the land temperature data
landtemps = pd.read_csv('data/landtempssample.csv',
    names=['stationid','year','month','avgtemp','latitude',
      'longitude','elevation','station','countryid','country'],
    skiprows=1,
    parse_dates=[['month','year']],
    low_memory=False)

landtemps.rename(columns={'month_year':'measuredate'}, inplace=True)

In [4]:
landtemps.dropna(subset=['avgtemp'], inplace=True)

In [5]:
landtemps.dtypes

measuredate    datetime64[ns]
stationid              object
avgtemp               float64
latitude              float64
longitude             float64
elevation             float64
station                object
countryid              object
country                object
dtype: object

In [6]:
landtemps.set_index(['measuredate','stationid'], inplace=True)

In [7]:
# write extreme values of temperature out to Excel and CSV files
extremevals = landtemps[(landtemps.avgtemp < landtemps.avgtemp.quantile(.001))
                        | (landtemps.avgtemp > landtemps.avgtemp.quantile(.999))]
extremevals.shape

(171, 7)

In [8]:
extremevals.sample(7)

Unnamed: 0_level_0,Unnamed: 1_level_0,avgtemp,...,country
measuredate,stationid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1960-12-01,RSM00024641,-37.42,...,Russia
2015-05-01,IN017121000,34.51,...,India
1993-07-01,MUM00041268,35.17,...,Oman
2018-09-01,AYW00090001,-62.4,...,Antarctica
2010-01-01,RSM00024790,-36.46,...,Russia
1984-01-01,RSM00031054,-35.05,...,Russia
1971-06-01,PKXLT416671,35.7,...,Pakistan


In [9]:
extremevals.to_excel('views/tempext.xlsx')

In [10]:
extremevals.to_csv('views/tempext.csv')

In [11]:
# save to pickle and feather files
landtemps.to_pickle('data/landtemps.pkl')

In [12]:
landtemps.reset_index(inplace=True)

In [None]:
landtemps.to_feather("data/landtemps.ftr")

In [None]:
# load saved pickle and feather files
landtemps = pd.read_pickle('data/landtemps.pkl')
landtemps.head(2).T

In [None]:
landtemps = pd.read_feather("data/landtemps.ftr")
landtemps.head(2).T