## Dealing with Dates
https://docs.python.org/3/library/datetime.html#datetime-objects

In [1]:
import datetime
import pandas as pd

dir_path = "../../data/ml-latest-small"
ratings = pd.read_csv(f"{dir_path}/ratings.csv")
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [6]:
foo = 964982703
converted = datetime.datetime.fromtimestamp(foo)

print(converted.isoformat())
print(converted.date().isoformat())
print(converted.year)
print(converted.weekday())

2000-07-30T13:45:03
2000-07-30
2000
6


In [7]:
# We cannot use it directly because we have a whole series.

ratings.assign(date = datetime.datetime.fromtimestamp(ratings.timestamp)).head()

TypeError: cannot convert the series to <class 'int'>

In [10]:
ratings.timestamp.apply(lambda x: datetime.datetime.fromtimestamp(x).date().isoformat()).head()

0    2000-07-30
1    2000-07-30
2    2000-07-30
3    2000-07-30
4    2000-07-30
Name: timestamp, dtype: object

In [11]:
# We need to "wrap" the fuction with a lambda so that it can be run on each value in the column.

ratings["date_full"] = (ratings.timestamp.apply(lambda x: datetime.datetime.fromtimestamp(x).date().isoformat()))
ratings["date_year"] = (ratings.timestamp.apply(lambda x: datetime.datetime.fromtimestamp(x).year))
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp,date_full,date_year
0,1,1,4.0,964982703,2000-07-30,2000
1,1,3,4.0,964981247,2000-07-30,2000
2,1,6,4.0,964982224,2000-07-30,2000
3,1,47,5.0,964983815,2000-07-30,2000
4,1,50,5.0,964982931,2000-07-30,2000


In [12]:
ratings.groupby("date_year").count()

Unnamed: 0_level_0,userId,movieId,rating,timestamp,date_full
date_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1996,6040,6040,6040,6040,6040
1997,1916,1916,1916,1916,1916
1998,507,507,507,507,507
1999,2439,2439,2439,2439,2439
2000,10061,10061,10061,10061,10061
2001,3922,3922,3922,3922,3922
2002,3478,3478,3478,3478,3478
2003,4014,4014,4014,4014,4014
2004,3279,3279,3279,3279,3279
2005,5813,5813,5813,5813,5813
