<a href="https://colab.research.google.com/github/maushamkumar/Pandas/blob/main/data_and_time.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

## Timestamp Object
Time stamps reference particular moments in time (eg, may 26, 2024 at 15:20)

## Creating Timestamp objects

In [3]:
# Creating a timestamp
pd.Timestamp('2024-05-26')

Timestamp('2024-05-26 00:00:00')

In [4]:
pd.Timestamp('2024, 05,26')

Timestamp('2024-05-01 00:00:00')

In [5]:
pd.Timestamp('2024')

Timestamp('2024-01-01 00:00:00')

In [6]:
# using text
pd.Timestamp('26th May, 2024')

Timestamp('2024-05-26 00:00:00')

In [8]:
# providing time also
pd.Timestamp('2024-05-26 15:20:30')

Timestamp('2024-05-26 15:20:30')

In [9]:
# Am and Pm

In [10]:
# python datetime object

In [12]:
import datetime as dt
dt.datetime(2024, 5, 26, 15, 20, 30)

datetime.datetime(2024, 5, 26, 15, 20, 30)

In [13]:
pd.Timestamp(dt.datetime(2024, 5, 26, 15, 20, 30))

Timestamp('2024-05-26 15:20:30')

In [14]:
x = pd.Timestamp(dt.datetime(2024, 5, 26, 15, 20, 30))

In [15]:
# Fetching attributes
x.year

2024

In [16]:
x.month

5

In [None]:
# why separate objects to handle data and time when python already has datetime functionality


* Syntax wise datetime is very convenient
* But the performance takes a hit while working wit huge data List vs NumPy array
* The weaknesses of Python's datetime format inspired the NumPy team to add a set of native time series data type to NumPy.
* The datetime64 dtype encodes dates as 64-bit integers, and thus allows arrays of dates to be represented very compaactly.

In [17]:
import numpy as np
date = np.array(['2024-05-26'], dtype=np.datetime64)

In [18]:
date + np.arange(12)

array(['2024-05-26', '2024-05-27', '2024-05-28', '2024-05-29',
       '2024-05-30', '2024-05-31', '2024-06-01', '2024-06-02',
       '2024-06-03', '2024-06-04', '2024-06-05', '2024-06-06'],
      dtype='datetime64[D]')

* Because of the uniform type in NumPy datetime4 arrays, this type of operation can be accomplished much more quickly than if we were working directly with Python's datetime objects, especially as arrays get large
* Pandas Timestamp object combines the ease-of-use of Python datetime with the efficient storage and vectorized interface of numpy datetime
* From a group of these Timestamp objects, Pandas can construct a Datetimeindex that can be used to index data in a Series or DataFrame

## DatetimeIndex Oject
A collection of pandas timestamp

In [19]:
# singe date ko store karne ke timestamp use or multiple timestamp ko store karne ke timestampindex use karte h
# from strings
pd.DatetimeIndex(['2024-05-26', '2024-05-27', '2024-05-28'])

DatetimeIndex(['2024-05-26', '2024-05-27', '2024-05-28'], dtype='datetime64[ns]', freq=None)

In [20]:
# Using Python datetime object
dt = [dt.datetime(2024, 5, 26), dt.datetime(2024, 5, 27), dt.datetime(2024, 5, 28)]
pd.DatetimeIndex(dt)

DatetimeIndex(['2024-05-26', '2024-05-27', '2024-05-28'], dtype='datetime64[ns]', freq=None)

In [23]:
# using pd.timestamp
dt_index = pd.DatetimeIndex([pd.Timestamp('2024-05-26'), pd.Timestamp('2024-05-27'), pd.Timestamp('2024-05-28')])

In [24]:
# using datetimeindex as series index
pd.Series([1, 2,3], index=dt_index)

2024-05-26    1
2024-05-27    2
2024-05-28    3
dtype: int64

## date_range function

In [28]:
# generate daily dates in a given range
pd.date_range(start='2024-05-20', end='2024-05-28', freq='D')

DatetimeIndex(['2024-05-20', '2024-05-21', '2024-05-22', '2024-05-23',
               '2024-05-24', '2024-05-25', '2024-05-26', '2024-05-27',
               '2024-05-28'],
              dtype='datetime64[ns]', freq='D')

In [29]:
pd.date_range(start='2024-05-20', end='2024-05-28', freq='2D')

DatetimeIndex(['2024-05-20', '2024-05-22', '2024-05-24', '2024-05-26',
               '2024-05-28'],
              dtype='datetime64[ns]', freq='2D')

In [30]:
pd.date_range(start='2024-05-20', end='2024-05-28', freq='B')

DatetimeIndex(['2024-05-20', '2024-05-21', '2024-05-22', '2024-05-23',
               '2024-05-24', '2024-05-27', '2024-05-28'],
              dtype='datetime64[ns]', freq='B')

In [31]:
# w -> one weak per day
pd.date_range(start='2024-05-20', end='2024-05-28', freq='w')


DatetimeIndex(['2024-05-26'], dtype='datetime64[ns]', freq='W-SUN')

In [32]:
# H -> Hourly data (factor)
pd.date_range(start='2024-05-20', end='2024-05-28', freq='H')

DatetimeIndex(['2024-05-20 00:00:00', '2024-05-20 01:00:00',
               '2024-05-20 02:00:00', '2024-05-20 03:00:00',
               '2024-05-20 04:00:00', '2024-05-20 05:00:00',
               '2024-05-20 06:00:00', '2024-05-20 07:00:00',
               '2024-05-20 08:00:00', '2024-05-20 09:00:00',
               ...
               '2024-05-27 15:00:00', '2024-05-27 16:00:00',
               '2024-05-27 17:00:00', '2024-05-27 18:00:00',
               '2024-05-27 19:00:00', '2024-05-27 20:00:00',
               '2024-05-27 21:00:00', '2024-05-27 22:00:00',
               '2024-05-27 23:00:00', '2024-05-28 00:00:00'],
              dtype='datetime64[ns]', length=193, freq='H')

In [36]:
# M -> Monthly data
pd.date_range(start='2024-03-20', end='2024-05-28', freq='M')

DatetimeIndex(['2024-03-31', '2024-04-30'], dtype='datetime64[ns]', freq='M')

In [35]:
# MS -> start of the month
pd.date_range(start='2024-04-20', end='2024-05-28', freq='MS')

DatetimeIndex(['2024-05-01'], dtype='datetime64[ns]', freq='MS')

In [37]:
# A -> yearly data
pd.date_range(start='2022-01-20', end='2024-05-28', freq='A')

DatetimeIndex(['2022-12-31', '2023-12-31'], dtype='datetime64[ns]', freq='A-DEC')

In [38]:
# usign periods (number of results)
pd.date_range(start='2024-05-20', periods=10, freq='D')

DatetimeIndex(['2024-05-20', '2024-05-21', '2024-05-22', '2024-05-23',
               '2024-05-24', '2024-05-25', '2024-05-26', '2024-05-27',
               '2024-05-28', '2024-05-29'],
              dtype='datetime64[ns]', freq='D')

# to_dateime function
convert an existing objects to pandas timestamp/datetimeindex object

In [41]:
# simple example
s = pd.Series(['2024-05-26', '2024-05-27', '2024-05-28'])
print(type(s))
pd.to_datetime(s)
pd.to_datetime(s).dt.year
pd.to_datetime(s).dt.month
pd.to_datetime(s).dt.day
pd.to_datetime(s).dt.weekday
pd.to_datetime(s).dt.day_name
pd.to_datetime(s).dt.month_name

<class 'pandas.core.series.Series'>


0   2024-05-26
1   2024-05-27
2   2024-05-28
dtype: datetime64[ns]

In [44]:
# with error
s = pd.Series(['2024-05-26', '2024-05-27', '2024-052-8'])
pd.to_datetime(s)

AttributeError: module 'pandas' has no attribute 'to_time'

In [47]:
pd.to_datetime(s, errors='coerce')

0   2024-05-26
1   2024-05-27
2          NaT
dtype: datetime64[ns]

In [42]:
df = pd.read_csv('/content/expense_data.csv')
df

Unnamed: 0,Date,Account,Category,Subcategory,Note,INR,Income/Expense,Note.1,Amount,Currency,Account.1
0,3/2/2022 10:11,CUB - online payment,Food,,Brownie,50.0,Expense,,50.0,INR,50.0
1,3/2/2022 10:11,CUB - online payment,Other,,To lended people,300.0,Expense,,300.0,INR,300.0
2,3/1/2022 19:50,CUB - online payment,Food,,Dinner,78.0,Expense,,78.0,INR,78.0
3,3/1/2022 18:56,CUB - online payment,Transportation,,Metro,30.0,Expense,,30.0,INR,30.0
4,3/1/2022 18:22,CUB - online payment,Food,,Snacks,67.0,Expense,,67.0,INR,67.0
...,...,...,...,...,...,...,...,...,...,...,...
272,11/22/2021 14:16,CUB - online payment,Food,,Dinner,90.0,Expense,,90.0,INR,90.0
273,11/22/2021 14:16,CUB - online payment,Food,,Lunch with company,97.0,Expense,,97.0,INR,97.0
274,11/21/2021 17:07,CUB - online payment,Transportation,,Rapido,130.0,Expense,,130.0,INR,130.0
275,11/21/2021 15:50,CUB - online payment,Food,,Lunch,875.0,Expense,,875.0,INR,875.0


In [48]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 277 entries, 0 to 276
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Date            277 non-null    object 
 1   Account         277 non-null    object 
 2   Category        277 non-null    object 
 3   Subcategory     0 non-null      float64
 4   Note            273 non-null    object 
 5   INR             277 non-null    float64
 6   Income/Expense  277 non-null    object 
 7   Note.1          0 non-null      float64
 8   Amount          277 non-null    float64
 9   Currency        277 non-null    object 
 10  Account.1       277 non-null    float64
dtypes: float64(5), object(6)
memory usage: 23.9+ KB
