In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


## Time Series 
####  - Time Series may refer to anything that is observed or measured at many points in time.
####  - Python provides different libraries for working with dates and time as illustrated below

In [2]:
from datetime import datetime, timedelta

# Reading the current time from the machine 
now = datetime.now() # datetime.datetime(2024, 4, 17, 21, 39, 24, 951653)

# We can access the year like this 
now.year # 2024 

# We can access the month like this 
now.month # 4 

# We can access the day like this 
now.day # 17

# If we want to get the calendar date, that is, YYMMDD 
now.date() # 2024, 4, 17 

# If we want to get the time 
now.time()

datetime.time(21, 35, 57, 113447)

### - Incase we want to perform some arithmetics on date, we can use delta

In [3]:
# Substrating two dates 
alpha = datetime(2023, 1, 1)
omega = datetime(2023, 12, 31)

delta = alpha - omega 
delta # 364 days

datetime.timedelta(days=-364)

In [4]:
# We can add or substract a time delta as below 
start = datetime(2023, 1, 1)

# Adding some 10 days
stop = start + timedelta(10)

stop # datetime.datetime(2023, 1, 11, 0, 0)

datetime.datetime(2023, 1, 11, 0, 0)

### Conversion between strings and date
####  - Datetime objects or Timestamps in Pandas can be formated as strings using "str" or using "strftime" method. 
####  - This is illustrated below

In [5]:
from time import strftime


today = datetime.now().date()
today # Gives (2024, 4, 17) 

# Converting the date to str 
today = str(now) 
type(now) # Gives "str"


#__________
# Using strftime method to convert dates into "str"
yesterday = datetime.now().date() - timedelta(1) 
yesterday_str = yesterday.strftime("%Y-%d-%m")
yesterday_str

#________
# OR we could convert "str" to date objeects 
tomorrow_str = str(datetime.now().date()) 
new_date = datetime.strptime(tomorrow_str, "%Y-%m-%d") 
new_date

datetime.datetime(2024, 4, 18, 0, 0)

#### - datetime.strptime() is the best method to convert strings to dates. However, you will need to specify the format you want every time. 
#### - To save yourself from this, you can use "parse" a method from "dateutil.parser" 
#### - This is illustrated below.

In [6]:
from dateutil.parser import parse 

# Converting a string to a date using parser.parse
some_date = parse("2024-12-31") 
some_date # Gives datetime.datetime(2024, 12, 31, 0, 0) 

# The method can also understand other humananized formats 
some_format = "Jan 31 2024"
converted_format = parse(some_format)
converted_format # Gives datetime.datetime(2024, 1, 31, 0, 0)

# Some date formats are known to begin with the day and not the year e.g., 6/12/2024 
# In such a case, we have include the parameter "dayfirst=True"
sample_date = "17/04/2024" 
new_sample_date1 = parse(sample_date, dayfirst=True) 
new_sample_date1

datetime.datetime(2024, 4, 17, 0, 0)

## Working with Dates in Pandas 
#### - Pandas provides a method for working with any kind of date representation, that is, "pd.to_datetime()"

In [15]:
date_strings = ["2010-01-01", "2011-01-01", "2012-01-01"]

# Converting date strings to Pandas datetime timeseries objects 
converted_dates = pd.to_datetime(date_strings) 
converted_dates

DatetimeIndex(['2010-01-01', '2011-01-01', '2012-01-01'], dtype='datetime64[ns]', freq=None)

## Indexing, Selecting, and Subsetting 
#### - Time Series behaves the same way when it comes to selecting data based on labels

In [21]:
ts_data = pd.Series(np.random.randn(100), index=pd.date_range("2010-01-01", periods=100))

# Accessing the data 
stamp = "2010-01-01" 
ts_data[stamp]

-0.32045024265613853

In [27]:
# A peek at the data
ts_data.head()

2010-01-01   -0.320450
2010-01-02   -0.045666
2010-01-03    0.058494
2010-01-04    1.425877
2010-01-05   -1.024264
Freq: D, dtype: float64

In [25]:
# We can also use a year, or a year plus a month to access slices of data 
ts_data["20100202"] # 0.09856160523919853

ts_data["2010-04"] # Gives a slice for April 

2010-04-01    0.783298
2010-04-02   -1.390276
2010-04-03   -0.649710
2010-04-04   -0.066595
2010-04-05    1.311498
2010-04-06   -1.269972
2010-04-07    0.426320
2010-04-08    0.247867
2010-04-09    0.195976
2010-04-10   -0.689576
Freq: D, dtype: float64

### Slicing with dates works as with regular Series 



In [32]:
# Gets the record for the month of Jan 2010 only
ts_data["2010-01-01":"2010-01-31"]

2010-01-01   -0.320450
2010-01-02   -0.045666
2010-01-03    0.058494
2010-01-04    1.425877
2010-01-05   -1.024264
2010-01-06    0.349630
2010-01-07    0.915598
2010-01-08   -0.229859
2010-01-09    0.094757
2010-01-10    1.720054
2010-01-11    0.815785
2010-01-12   -1.618623
2010-01-13   -0.320470
2010-01-14    0.729197
2010-01-15   -0.418456
2010-01-16    0.121967
2010-01-17   -0.165328
2010-01-18   -0.250537
2010-01-19    0.835185
2010-01-20    1.203273
2010-01-21    2.229024
2010-01-22   -0.324681
2010-01-23   -0.814178
2010-01-24    0.830890
2010-01-25   -0.297994
2010-01-26    0.319386
2010-01-27   -0.332036
2010-01-28   -1.027967
2010-01-29    0.439827
2010-01-30    1.321640
2010-01-31   -0.583149
Freq: D, dtype: float64

### The same applies if we are working with a DataFrame

In [35]:
data_df = pd.DataFrame(
    data=np.random.randn(50, 4),
    index=pd.date_range("2024-01-01", periods=50),
    columns=["Nairobi", "Nakuru", "Mombassa", "Kisumu"]
) 

data_df.head()

Unnamed: 0,Nairobi,Nakuru,Mombassa,Kisumu
2024-01-01,0.185666,-0.356636,-0.258443,0.906677
2024-01-02,-0.995455,0.411399,0.055577,-1.151458
2024-01-03,0.088252,0.730981,-1.057109,0.521861
2024-01-04,-0.784926,-2.022647,0.247708,0.120709
2024-01-05,1.607078,0.741018,0.268835,-0.0589


In [38]:
data_df["20240101":"20240131"] # Returns a slice for the month of Jan 2024 

# OR We could use loc/iloc 
data_df.loc["20240101":"20240110", "Nairobi":"Mombassa"]

Unnamed: 0,Nairobi,Nakuru,Mombassa
2024-01-01,0.185666,-0.356636,-0.258443
2024-01-02,-0.995455,0.411399,0.055577
2024-01-03,0.088252,0.730981,-1.057109
2024-01-04,-0.784926,-2.022647,0.247708
2024-01-05,1.607078,0.741018,0.268835
2024-01-06,1.361889,-0.498457,0.441342
2024-01-07,1.146457,1.965574,-1.362441
2024-01-08,-2.24917,-0.120869,-0.906196
2024-01-09,-0.690384,0.625545,0.58183
2024-01-10,-0.914906,-0.810188,1.697702
