# Learning Pandas

In [3]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt

LARGE_FIGSIZE = (12, 8)  # defines size of matplotlib figures

In [4]:
import pandas as pd

# set_option controls many options
pd.set_option("display.max_rows", 16)   # 20 by default

Using data from a pandas tutorial

In [24]:
%cd /media/sf_estudios/pandas/pandas_tutorial/climate_timeseries

/media/sf_estudios/pandas/pandas_tutorial/climate_timeseries


## 01 Reading Files

In [18]:
# using pd.read_table()

Data has no header, and is separated by several spaces

In [20]:
!head data/temperatures/annual.land_ocean.90S.90N.df_1901-2000mean.dat

1880   -0.1591
1881   -0.0789
1882   -0.1313
1883   -0.1675
1884   -0.2485
1885   -0.2042
1886   -0.1695
1887   -0.2505
1888   -0.1605
1889   -0.1103


In [21]:
data = pd.read_table("data/temperatures/annual.land_ocean.90S.90N.df_1901-2000mean.dat", 
                     sep="\s+", names=["year", "mean_temp"])

In [26]:
data  # has a numeric index

Unnamed: 0,year,mean_temp
0,1880,-0.1591
1,1881,-0.0789
2,1882,-0.1313
3,1883,-0.1675
4,1884,-0.2485
5,1885,-0.2042
6,1886,-0.1695
7,1887,-0.2505
...,...,...
123,2003,0.5818


In [29]:
data2 = pd.read_table("data/temperatures/annual.land_ocean.90S.90N.df_1901-2000mean.dat", sep="\s+", 
                      names=["year", "mean temp"], index_col=0)
data2

Unnamed: 0_level_0,mean temp
year,Unnamed: 1_level_1
1880,-0.1591
1881,-0.0789
1882,-0.1313
1883,-0.1675
1884,-0.2485
1885,-0.2042
1886,-0.1695
1887,-0.2505
...,...
2003,0.5818


### Convert dates on read

In [31]:
data2 = pd.read_table("data/temperatures/annual.land_ocean.90S.90N.df_1901-2000mean.dat", sep="\s+", 
                      names=["year", "mean temp"], index_col=0, parse_dates=True)
data2

Unnamed: 0_level_0,mean temp
year,Unnamed: 1_level_1
1880-01-01 00:00:00,-0.1591
1881-01-01 00:00:00,-0.0789
1882-01-01 00:00:00,-0.1313
1883-01-01 00:00:00,-0.1675
1884-01-01 00:00:00,-0.2485
1885-01-01 00:00:00,-0.2042
1886-01-01 00:00:00,-0.1695
1887-01-01 00:00:00,-0.2505
...,...
2003-01-01,0.5818


In [33]:
type(data2)
type(data2.index)

pandas.tseries.index.DatetimeIndex

### Data with text on the top and bottom of the file, that I want to skip. 
### If I dont use engine="python" , I can't use skip_footer

In [25]:
giss_temp = pd.read_table("data/temperatures/GLB.Ts+dSST.txt", sep="\s+", skiprows=7,
                          skip_footer=11, engine="python")
giss_temp

Unnamed: 0,Year,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,J-D,D-N,DJF,MAM,JJA,SON,Year.1
0,1880,-34,-27,-22,-30,-16,-24,-19,-12,-20,-19,-16,-21,-22,***,****,-23,-18,-18,1880
1,1881,-13,-16,-2,-3,-3,-27,-12,-8,-18,-23,-28,-18,-14,-14,-17,-3,-15,-23,1881
2,1882,3,4,-2,-24,-20,-32,-27,-11,-11,-25,-25,-37,-17,-16,-4,-15,-23,-20,1882
3,1883,-38,-38,-12,-20,-20,-8,-3,-13,-19,-19,-28,-21,-20,-21,-38,-18,-8,-22,1883
4,1884,-20,-14,-31,-36,-33,-36,-31,-24,-29,-25,-29,-25,-28,-28,-18,-33,-31,-28,1884
5,1885,-57,-29,-19,-36,-35,-40,-28,-24,-17,-14,-14,0,-26,-28,-37,-30,-31,-15,1885
6,1886,-37,-43,-34,-23,-21,-30,-13,-21,-12,-22,-29,-18,-25,-24,-26,-26,-21,-21,1886
7,1887,-60,-43,-26,-34,-28,-25,-19,-28,-24,-33,-29,-40,-32,-30,-40,-29,-24,-28,1887
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,2009,56,48,49,57,59,62,66,61,64,58,72,58,59,59,52,55,63,65,2009


## Read from url

In [34]:
url = "http://sealevel.colorado.edu/files/2015_rel2/sl_ns_global.txt"
global_sea_level = pd.read_table(url, sep="\s+")
global_sea_level

Unnamed: 0,year,msl_ib_ns(mm),#version_2015_rel2
0,1992.9595,-5.818,
1,1992.9866,-7.525,
2,1993.0138,-9.215,
3,1993.0409,-11.796,
4,1993.0681,-11.291,
5,1993.0952,-9.569,
6,1993.1223,-3.714,
7,1993.1495,-2.471,
...,...,...,...
772,2014.9494,70.723,


In [5]:
table_list = pd.read_html("http://www.psmsl.org/data/obtaining/")
table_list
type(table_list[0])

pandas.core.frame.DataFrame

In [4]:
table_list[0]

Unnamed: 0,Station Name,ID,Lat.,Lon.,GLOSS ID,Country,Date,Coastline,Station
0,HELIGMAN,231,60.200,19.300,,ALA,01/01/1980,60,251
1,KOBBAKLINTAR,63,60.033,19.883,,ALA,01/01/1980,60,261
2,LEMSTROM,84,60.100,20.017,,ALA,01/01/1980,60,271
3,FOGLO / DEGERBY,249,60.032,20.385,,ALA,06/10/2015,60,281
4,PAGO PAGO,539,-14.280,-170.690,144.0,ASM,08/03/2016,745,1
5,BAHIA ESPERANZA,988,-63.300,-56.917,185.0,ATA,11/05/1999,999,1
6,ARGENTINE ISLANDS,913,-65.246,-64.257,188.0,ATA,21/05/2015,999,3
7,PUERTO SOBERANIA,1603,-62.483,-59.633,189.0,ATA,30/09/2004,999,5
8,ALMIRANTE BROWN,858,-64.900,-62.867,,ATA,01/01/1980,999,24
9,CAPE ROBERTS ANTARCTICA,1763,-77.034,163.191,,ATA,27/09/2010,999,70
