In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [5]:
# Define date range and create dummy dataset
num_rows = 100
start_date = datetime(2023, 1, 1, 8, 0)  # start date
date_range = [start_date + timedelta(minutes=np.random.randint(0, 100000)) for _ in range(num_rows)]

# Create the dummy dataset
data = {
    'date': [date.strftime('%m/%d/%Y %H:%M') for date in date_range],
    'value': np.random.rand(num_rows) * 100  # random values for additional context
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Display the dataset
print(df.dtypes)
print(df.head(5))

date      object
value    float64
dtype: object
               date      value
0  03/02/2023 06:47  66.127092
1  02/24/2023 18:01  90.274018
2  02/10/2023 04:37  91.276608
3  01/22/2023 14:47  98.309156
4  03/03/2023 03:09  41.772164


So, in summary, .str.split(' ') splits the date strings into two parts (date and time), 
and .str[0] grabs the first part, which is the 'mm/dd/yyyy' format.

In [4]:
# Remove 'HH:MM' part and keep only 'mm/dd/yyyy'
# .str.split(' ') splits each string in the 'date' column at every occurrence of a space (' '). This results in each date string being split into a list of two elements: the first part (which is the 'mm/dd/yyyy') and the second part (which is the 'HH').
# .str[0] selects the first element of each list resulting from the split operation, which corresponds to the 'mm/dd/yyyy' part of the date.
df['date'] = df['date'].str.split(' ').str[0]

# Display the updated DataFrame
print(df.head(5))

         date      value
0  03/03/2023  19.913068
1  02/27/2023  12.913094
2  01/12/2023  98.541171
3  01/28/2023  78.216282
4  03/07/2023   0.788886


In [6]:
# Convert 'date' column to datetime64[ns]
df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y %H:%M')

# Verify the data types
print(df.dtypes)
# Display the first 5 rows to ensure the conversion is successful
print(df.head(5))

date     datetime64[ns]
value           float64
dtype: object
                 date      value
0 2023-03-02 06:47:00  66.127092
1 2023-02-24 18:01:00  90.274018
2 2023-02-10 04:37:00  91.276608
3 2023-01-22 14:47:00  98.309156
4 2023-03-03 03:09:00  41.772164


In [8]:
# Define date range and create dummy dataset
num_rows = 100
start_date = datetime(2023, 1, 1, 8, 0)  # start date
date_range = [start_date + timedelta(minutes=np.random.randint(0, 100000)) for _ in range(num_rows)]

# Create the dummy dataset
data = {
    # No need to format with strftime; just pass the datetime objects and use pd.to_datetime
    'date': pd.to_datetime(date_range).strftime('%m/%d/%Y %H:%M'),
    'value': np.random.rand(num_rows) * 100  # random values for additional context
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
print(df.dtypes)
print(df.head())  # Display first few rows of the DataFrame

date      object
value    float64
dtype: object
               date      value
0  02/18/2023 21:33  19.655764
1  03/04/2023 07:28  73.184132
2  02/27/2023 12:15  25.915669
3  01/09/2023 17:31  47.817152
4  01/06/2023 10:03  91.059040


In [9]:
# Remove time part but keep as datetime objects (date only)
df['date'] = pd.to_datetime(df['date']).dt.date

# Display the first few rows
print(df.head())

         date      value
0  2023-02-18  19.655764
1  2023-03-04  73.184132
2  2023-02-27  25.915669
3  2023-01-09  47.817152
4  2023-01-06  91.059040


In [10]:
# Convert to string in 'mm/dd/yyyy' format
df['date'] = pd.to_datetime(df['date']).dt.strftime('%m/%d/%Y')

# Display the first few rows
print(df.head())

         date      value
0  02/18/2023  19.655764
1  03/04/2023  73.184132
2  02/27/2023  25.915669
3  01/09/2023  47.817152
4  01/06/2023  91.059040
