In [2]:
# import the library
import pandas as pd

In [3]:
df = pd.read_csv("src/datetime_df.csv")
df.head()

Unnamed: 0,dt,values
0,2022-01-01 00:00:00,100.86787
1,2022-01-01 01:00:00,100.950371
2,2022-01-01 02:00:00,100.015876
3,2022-01-01 03:00:00,97.719805
4,2022-01-01 04:00:00,98.95258


In [4]:
df.dtypes

dt         object
values    float64
dtype: object

In [5]:
df['dt'] = pd.to_datetime(df["dt"])
df.dtypes

dt        datetime64[ns]
values           float64
dtype: object

In [6]:
pd.to_datetime(df["dt"], format = "%Y-%m-%d %H:%M:%S")

0      2022-01-01 00:00:00
1      2022-01-01 01:00:00
2      2022-01-01 02:00:00
3      2022-01-01 03:00:00
4      2022-01-01 04:00:00
               ...        
8732   2022-12-30 20:00:00
8733   2022-12-30 21:00:00
8734   2022-12-30 22:00:00
8735   2022-12-30 23:00:00
8736   2022-12-31 00:00:00
Name: dt, Length: 8737, dtype: datetime64[ns]

In [7]:
df['dt'].dt.strftime("%d/%m/%Y %H:%M:%S")

0       01/01/2022 00:00:00
1       01/01/2022 01:00:00
2       01/01/2022 02:00:00
3       01/01/2022 03:00:00
4       01/01/2022 04:00:00
               ...         
8732    30/12/2022 20:00:00
8733    30/12/2022 21:00:00
8734    30/12/2022 22:00:00
8735    30/12/2022 23:00:00
8736    31/12/2022 00:00:00
Name: dt, Length: 8737, dtype: object

In [8]:
df['year'] = df['dt'].dt.year
df['month'] = df['dt'].dt.month
df['day'] = df['dt'].dt.day
df['weekday'] = df['dt'].dt.dayofweek
df['hour'] = df['dt'].dt.hour
df['minute'] = df['dt'].dt.minute
df['second'] = df['dt'].dt.second

In [9]:
df.head()

Unnamed: 0,dt,values,year,month,day,weekday,hour,minute,second
0,2022-01-01 00:00:00,100.86787,2022,1,1,5,0,0,0
1,2022-01-01 01:00:00,100.950371,2022,1,1,5,1,0,0
2,2022-01-01 02:00:00,100.015876,2022,1,1,5,2,0,0
3,2022-01-01 03:00:00,97.719805,2022,1,1,5,3,0,0
4,2022-01-01 04:00:00,98.95258,2022,1,1,5,4,0,0


In [10]:
# Create an object representing a timedelta of 1 day
days_1 = pd.to_timedelta(1, unit = 'd')
# Use the timedelta object to apply a systematic shift of - 1 day to all datetimes:
df["dt_1d"] = df["dt"] - days_1

# Create an object representing a timedelta of 10 hours
hours_10 = pd.to_timedelta(10, unit = 'h')
# Use the timedelta object to apply a systematic shift of + 10 hours to all datetimes:
df["dt_10h"] = df["dt"] + hours_10

df.head()

Unnamed: 0,dt,values,year,month,day,weekday,hour,minute,second,dt_1d,dt_10h
0,2022-01-01 00:00:00,100.86787,2022,1,1,5,0,0,0,2021-12-31 00:00:00,2022-01-01 10:00:00
1,2022-01-01 01:00:00,100.950371,2022,1,1,5,1,0,0,2021-12-31 01:00:00,2022-01-01 11:00:00
2,2022-01-01 02:00:00,100.015876,2022,1,1,5,2,0,0,2021-12-31 02:00:00,2022-01-01 12:00:00
3,2022-01-01 03:00:00,97.719805,2022,1,1,5,3,0,0,2021-12-31 03:00:00,2022-01-01 13:00:00
4,2022-01-01 04:00:00,98.95258,2022,1,1,5,4,0,0,2021-12-31 04:00:00,2022-01-01 14:00:00


In [11]:
# It's possible to use the operator "-"" between two datetime Series
# it will compute the time elapsed between the two Series and return it as a Timedelta
elapsed_time = df["dt_10h"] - df["dt"]
print("Elapsed time between dt and dt_10h:")
print(elapsed_time)
print()

# It's possible to convert this information into a duration in days or seconds:
print("Elapsed time in days:")
print(elapsed_time.dt.days)
print()
print("Elapsed time in seconds:")
print(elapsed_time.dt.seconds)

Elapsed time between dt and dt_10h:
0      0 days 10:00:00
1      0 days 10:00:00
2      0 days 10:00:00
3      0 days 10:00:00
4      0 days 10:00:00
             ...      
8732   0 days 10:00:00
8733   0 days 10:00:00
8734   0 days 10:00:00
8735   0 days 10:00:00
8736   0 days 10:00:00
Length: 8737, dtype: timedelta64[ns]

Elapsed time in days:
0       0
1       0
2       0
3       0
4       0
       ..
8732    0
8733    0
8734    0
8735    0
8736    0
Length: 8737, dtype: int64

Elapsed time in seconds:
0       36000
1       36000
2       36000
3       36000
4       36000
        ...  
8732    36000
8733    36000
8734    36000
8735    36000
8736    36000
Length: 8737, dtype: int32


In [12]:
df = pd.read_csv("src/string_df.csv")
df.head()

Unnamed: 0,description
0,Scents of smoke and aged honey entice on the n...
1,"This bottling, which is Randall Grahm's hommag..."
2,"For an easy and aromatic Sauvignon, Ermacora d..."
3,A 65-35 blend of Sangiovese and Cabernet Sauvi...
4,Like all of Beringer's 2001 gold letter single...


In [13]:
# Lowercase
df["description"].str.lower()

0     scents of smoke and aged honey entice on the n...
1     this bottling, which is randall grahm's hommag...
2     for an easy and aromatic sauvignon, ermacora d...
3     a 65-35 blend of sangiovese and cabernet sauvi...
4     like all of beringer's 2001 gold letter single...
                            ...                        
95    a very nice wine, lush and drinkable, that dem...
96    leather, smashed berries and juniper reveal th...
97    a simple wine that straddles the balance betwe...
98    one of australia's most expensive and sought-a...
99    thick and luscious with deep oak tones and big...
Name: description, Length: 100, dtype: object

In [14]:
# Replace each occurrence of a pattern
df["description"].str.replace(" and ", "&") # pattern matching is case-sensitive and takes whitespaces into account

0     Scents of smoke&aged honey entice on the nose ...
1     This bottling, which is Randall Grahm's hommag...
2     For an easy&aromatic Sauvignon, Ermacora deliv...
3     A 65-35 blend of Sangiovese&Cabernet Sauvignon...
4     Like all of Beringer's 2001 gold letter single...
                            ...                        
95    A very nice wine, lush&drinkable, that demonst...
96    Leather, smashed berries&juniper reveal the ag...
97    A simple wine that straddles the balance betwe...
98    One of Australia's most expensive&sought-after...
99    Thick&luscious with deep oak tones&big cherry ...
Name: description, Length: 100, dtype: object

In [15]:
# Slicing
print(df["description"].str.slice(start=3)) # remove 3 first characters
print()
print(df["description"].str.slice(start=0, stop=10)) # Keep only 10 first characters

0     nts of smoke and aged honey entice on the nose...
1     s bottling, which is Randall Grahm's hommage t...
2      an easy and aromatic Sauvignon, Ermacora deli...
3     5-35 blend of Sangiovese and Cabernet Sauvigno...
4     e all of Beringer's 2001 gold letter single-vi...
                            ...                        
95    ery nice wine, lush and drinkable, that demons...
96    ther, smashed berries and juniper reveal the a...
97    imple wine that straddles the balance between ...
98     of Australia's most expensive and sought-afte...
99    ck and luscious with deep oak tones and big ch...
Name: description, Length: 100, dtype: object

0     Scents of 
1     This bottl
2     For an eas
3     A 65-35 bl
4     Like all o
         ...    
95    A very nic
96    Leather, s
97    A simple w
98    One of Aus
99    Thick and 
Name: description, Length: 100, dtype: object


In [16]:
# Extract string length
df["desc_len"] = df["description"].str.len()

# Test if string starts with a given pattern
df["desc_starts_with_This"] = df["description"].str.startswith("This")

# Test if string contains in a given pattern
df["desc_contains_This"] = df["description"].str.contains("This")

# Count the occurrences of a pattern
df["desc_count_This"] = df["description"].str.count("This")

# Split string into a list of strings, by using a given separator
df["desc_split"] = df["description"].str.split(",")

# For the braves: how to count words
df["desc_wordcount"] = df['description'].str.split(" ").apply(lambda x: len(x))

df.head()

Unnamed: 0,description,desc_len,desc_starts_with_This,desc_contains_This,desc_count_This,desc_split,desc_wordcount
0,Scents of smoke and aged honey entice on the n...,199,False,False,0,[Scents of smoke and aged honey entice on the ...,35
1,"This bottling, which is Randall Grahm's hommag...",522,True,True,1,"[This bottling, which is Randall Grahm's homm...",84
2,"For an easy and aromatic Sauvignon, Ermacora d...",223,False,False,0,"[For an easy and aromatic Sauvignon, Ermacora...",38
3,A 65-35 blend of Sangiovese and Cabernet Sauvi...,203,False,True,1,[A 65-35 blend of Sangiovese and Cabernet Sauv...,32
4,Like all of Beringer's 2001 gold letter single...,383,False,False,0,[Like all of Beringer's 2001 gold letter singl...,64
