# Working with Time Series in Pandas

## Imports

In [9]:
import pandas as pd
import seaborn as sns
from vega_datasets import data
data.sf_temps()
from vega_datasets import data
data.seattle_weather()
from vega_datasets import data
data.flights_20k()
from vega_datasets import data
data.iowa_electricity()
print('youre good')

youre good


## Lesson 

In [3]:
# Use to_datetime function from the pandas module to convert an 
# existing string value to a datetime type
pd.to_datetime('Jan 1 1970')

Timestamp('1970-01-01 00:00:00')

In [4]:
# Figure out the format of the date
try:
    pd.to_datetime('Jan:1:1970')
except ValueError as e:
    print('ValueError', e)

ValueError Unknown string format: Jan:1:1970


In [5]:
# Here we can specify a format string to tell pandas explicitly 
# how to convert this date.
pd.to_datetime('Jan:1:1970', format='%b:%d:%Y')

Timestamp('1970-01-01 00:00:00')

In [14]:
# In addition to single strings, pd.to_datetime will work with entire 
# columns at a time.
from vega_datasets import data
df = data.movies()
df.head()


Unnamed: 0,Title,US_Gross,Worldwide_Gross,US_DVD_Sales,Production_Budget,Release_Date,MPAA_Rating,Running_Time_min,Distributor,Source,Major_Genre,Creative_Type,Director,Rotten_Tomatoes_Rating,IMDB_Rating,IMDB_Votes
0,The Land Girls,146083.0,146083.0,,8000000.0,Jun 12 1998,R,,Gramercy,,,,,,6.1,1071.0
1,"First Love, Last Rites",10876.0,10876.0,,300000.0,Aug 07 1998,R,,Strand,,Drama,,,,6.9,207.0
2,I Married a Strange Person,203134.0,203134.0,,250000.0,Aug 28 1998,,,Lionsgate,,Comedy,,,,6.8,865.0
3,Let's Talk About Sex,373615.0,373615.0,,300000.0,Sep 11 1998,,,Fine Line,,Comedy,,,13.0,,
4,Slam,1009819.0,1087521.0,,1000000.0,Oct 09 1998,R,,Trimark,Original Screenplay,Drama,Contemporary Fiction,,62.0,3.4,165.0


In [15]:
print(df.dtypes)

Title                      object
US_Gross                  float64
Worldwide_Gross           float64
US_DVD_Sales              float64
Production_Budget         float64
Release_Date               object
MPAA_Rating                object
Running_Time_min          float64
Distributor                object
Source                     object
Major_Genre                object
Creative_Type              object
Director                   object
Rotten_Tomatoes_Rating    float64
IMDB_Rating               float64
IMDB_Votes                float64
dtype: object


In [16]:
# convert our date column to datetime type
df.Release_Date = pd.to_datetime(df.Release_Date)
df.dtypes

Title                             object
US_Gross                         float64
Worldwide_Gross                  float64
US_DVD_Sales                     float64
Production_Budget                float64
Release_Date              datetime64[ns]
MPAA_Rating                       object
Running_Time_min                 float64
Distributor                       object
Source                            object
Major_Genre                       object
Creative_Type                     object
Director                          object
Rotten_Tomatoes_Rating           float64
IMDB_Rating                      float64
IMDB_Votes                       float64
dtype: object

In [17]:
df.head()

Unnamed: 0,Title,US_Gross,Worldwide_Gross,US_DVD_Sales,Production_Budget,Release_Date,MPAA_Rating,Running_Time_min,Distributor,Source,Major_Genre,Creative_Type,Director,Rotten_Tomatoes_Rating,IMDB_Rating,IMDB_Votes
0,The Land Girls,146083.0,146083.0,,8000000.0,1998-06-12,R,,Gramercy,,,,,,6.1,1071.0
1,"First Love, Last Rites",10876.0,10876.0,,300000.0,1998-08-07,R,,Strand,,Drama,,,,6.9,207.0
2,I Married a Strange Person,203134.0,203134.0,,250000.0,1998-08-28,,,Lionsgate,,Comedy,,,,6.8,865.0
3,Let's Talk About Sex,373615.0,373615.0,,300000.0,1998-09-11,,,Fine Line,,Comedy,,,13.0,,
4,Slam,1009819.0,1087521.0,,1000000.0,1998-10-09,R,,Trimark,Original Screenplay,Drama,Contemporary Fiction,,62.0,3.4,165.0


In [19]:
# In addition to the properties listed above, we can use the strftime 
# method and give date string to format the date in a custom way (more on this later).
df['year'] = df.Release_Date.dt.year
df['month'] = df.Release_Date.dt.month
df['day'] = df.Release_Date.dt.day
df['weekday'] = df.Release_Date.dt.day_name()
df.head()

Unnamed: 0,Title,US_Gross,Worldwide_Gross,US_DVD_Sales,Production_Budget,Release_Date,MPAA_Rating,Running_Time_min,Distributor,Source,Major_Genre,Creative_Type,Director,Rotten_Tomatoes_Rating,IMDB_Rating,IMDB_Votes,year,month,day,weekday
0,The Land Girls,146083.0,146083.0,,8000000.0,1998-06-12,R,,Gramercy,,,,,,6.1,1071.0,1998,6,12,Friday
1,"First Love, Last Rites",10876.0,10876.0,,300000.0,1998-08-07,R,,Strand,,Drama,,,,6.9,207.0,1998,8,7,Friday
2,I Married a Strange Person,203134.0,203134.0,,250000.0,1998-08-28,,,Lionsgate,,Comedy,,,,6.8,865.0,1998,8,28,Friday
3,Let's Talk About Sex,373615.0,373615.0,,300000.0,1998-09-11,,,Fine Line,,Comedy,,,13.0,,,1998,9,11,Friday
4,Slam,1009819.0,1087521.0,,1000000.0,1998-10-09,R,,Trimark,Original Screenplay,Drama,Contemporary Fiction,,62.0,3.4,165.0,1998,10,9,Friday


In [None]:
# Once we have a date-time column on a dataframe, we can use that column 
# as the index on our dataframe.


In [20]:
df = df.set_index('Release_Date').sort_index()
df

Unnamed: 0_level_0,Title,US_Gross,Worldwide_Gross,US_DVD_Sales,Production_Budget,MPAA_Rating,Running_Time_min,Distributor,Source,Major_Genre,Creative_Type,Director,Rotten_Tomatoes_Rating,IMDB_Rating,IMDB_Votes,year,month,day,weekday
Release_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1928-12-31,The Broadway Melody,2800000.0,4358000.0,,379000.0,,,MGM,Original Screenplay,Musical,,,38.0,6.7,2017.0,1928,12,31,Monday
1929-12-31,Hell's Angels,,,,4000000.0,,,,,,,,90.0,7.9,2050.0,1929,12,31,Tuesday
1930-12-31,Mata Hari,900000.0,900000.0,,558000.0,,,MGM,,,,,,2.2,376.0,1930,12,31,Wednesday
1933-12-31,It Happened One Night,2500000.0,2500000.0,,325000.0,,,,,Romantic Comedy,,Frank Capra,97.0,8.3,25074.0,1933,12,31,Sunday
1937-12-31,You Can't Take It With You,4000000.0,4000000.0,,1644000.0,,,,,,,Frank Capra,96.0,8.0,8597.0,1937,12,31,Friday
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2042-11-16,Cat People,4000000.0,8000000.0,,134000.0,,,RKO Radio Pictures,Original Screenplay,Drama,Fantasy,,91.0,5.9,6791.0,2042,11,16,Sunday
2043-12-24,A Guy Named Joe,5363000.0,5363000.0,,2627000.0,,,MGM,Original Screenplay,Drama,,,,6.9,869.0,2043,12,24,Thursday
2044-08-01,Wilson,2000000.0,2000000.0,,5200000.0,,,,,,,,,7.0,451.0,2044,8,1,Monday
2046-11-21,The Best Years of Our Lives,23600000.0,23600000.0,,2100000.0,,,RKO Radio Pictures,Based on Book/Short Story,Drama,,William Wyler,97.0,8.2,17338.0,2046,11,21,Wednesday


In [22]:
# First let's take a look at the start and end of our data
df.index.min(), df.index.max()

(Timestamp('1928-12-31 00:00:00'), Timestamp('2046-12-31 00:00:00'))

In [26]:
# With a datetime index we can access specific subsets of the data
df['1999-01']

Unnamed: 0_level_0,Title,US_Gross,Worldwide_Gross,US_DVD_Sales,Production_Budget,MPAA_Rating,Running_Time_min,Distributor,Source,Major_Genre,Creative_Type,Director,Rotten_Tomatoes_Rating,IMDB_Rating,IMDB_Votes,year,month,day,weekday
Release_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1999-01-08,Sticky Fingers of Time,18195.0,20628.0,,250000.0,,,Strand,,Thriller/Suspense,,,,,,1999,1,8,Friday
1999-01-15,In Dreams,12017369.0,12017369.0,,30000000.0,R,,Dreamworks SKG,Based on Book/Short Story,Horror,Contemporary Fiction,Neil Jordan,22.0,5.3,7138.0,1999,1,15,Friday
1999-01-15,Virus,14010690.0,30626690.0,,75000000.0,R,,Universal,Based on Comic/Graphic Novel,Action,Science Fiction,,9.0,4.5,10487.0,1999,1,15,Friday
1999-01-15,Mississippi Mermaid,24551.0,2624551.0,,1600000.0,,,MGM,,,,,,,,1999,1,15,Friday
1999-01-15,Varsity Blues,52894169.0,54294169.0,,16000000.0,R,104.0,Paramount Pictures,Original Screenplay,Drama,Contemporary Fiction,Brian Robbins,39.0,6.0,18066.0,1999,1,15,Friday
1999-01-15,At First Sight,22365133.0,22365133.0,,40000000.0,PG-13,128.0,MGM,Original Screenplay,Drama,Contemporary Fiction,,33.0,5.6,6872.0,1999,1,15,Friday
1999-01-22,Bacheha-Ye aseman,925402.0,925402.0,,180000.0,PG,,Miramax,Original Screenplay,Comedy,Contemporary Fiction,,,8.0,6657.0,1999,1,22,Friday
1999-01-22,Gloria,4167493.0,4967493.0,,30000000.0,R,,Sony Pictures,Remake,Drama,Contemporary Fiction,Sidney Lumet,19.0,4.7,2726.0,1999,1,22,Friday
1999-01-29,She's All That,63465522.0,63465522.0,,10000000.0,PG-13,91.0,Miramax,Original Screenplay,Romantic Comedy,Contemporary Fiction,,38.0,5.4,28498.0,1999,1,29,Friday


In [27]:
# Note that unlike list slicing syntax, slicing with dates in the example 
# above is inclusive of the upper bound.
df['1999-01-08':'1999-01-16']

Unnamed: 0_level_0,Title,US_Gross,Worldwide_Gross,US_DVD_Sales,Production_Budget,MPAA_Rating,Running_Time_min,Distributor,Source,Major_Genre,Creative_Type,Director,Rotten_Tomatoes_Rating,IMDB_Rating,IMDB_Votes,year,month,day,weekday
Release_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1999-01-08,Sticky Fingers of Time,18195.0,20628.0,,250000.0,,,Strand,,Thriller/Suspense,,,,,,1999,1,8,Friday
1999-01-15,In Dreams,12017369.0,12017369.0,,30000000.0,R,,Dreamworks SKG,Based on Book/Short Story,Horror,Contemporary Fiction,Neil Jordan,22.0,5.3,7138.0,1999,1,15,Friday
1999-01-15,Virus,14010690.0,30626690.0,,75000000.0,R,,Universal,Based on Comic/Graphic Novel,Action,Science Fiction,,9.0,4.5,10487.0,1999,1,15,Friday
1999-01-15,Mississippi Mermaid,24551.0,2624551.0,,1600000.0,,,MGM,,,,,,,,1999,1,15,Friday
1999-01-15,Varsity Blues,52894169.0,54294169.0,,16000000.0,R,104.0,Paramount Pictures,Original Screenplay,Drama,Contemporary Fiction,Brian Robbins,39.0,6.0,18066.0,1999,1,15,Friday
1999-01-15,At First Sight,22365133.0,22365133.0,,40000000.0,PG-13,128.0,MGM,Original Screenplay,Drama,Contemporary Fiction,,33.0,5.6,6872.0,1999,1,15,Friday
