In [1]:
# Import required libraries
import pandas as pd
import plotly.express as px
import plotly.io as pio
import matplotlib.pyplot as plt
import seaborn as sns

# Configure plotly to work in Jupyter
pio.renderers.default = "notebook"

# Try alternative renderer if notebook doesn't work
# pio.renderers.default = "browser"

In [2]:
# Read the dataset
file_path = '../datasets_cleaned/Economy/Export_of_Goods_&_Services.csv'

# Read the CSV file
df = pd.read_csv(file_path)

# Convert Date column to datetime with a specific format
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')

# Save with date format that preserves datetime
df.to_csv(file_path, index=False, date_format='%Y-%m-%d')

print("Original conversion:")
print(df['Date'].dtype)
print("\nFirst few dates:")
print(df['Date'].head())

# Validate by reading again
df_validate = pd.read_csv(file_path, parse_dates=['Date'])
print("\nValidation after reading again:")
print(df_validate['Date'].dtype)
print("\nFirst few dates after reloading:")
print(df_validate['Date'].head())

Original conversion:
datetime64[ns]

First few dates:
0   2005-07-31
1   2005-08-31
2   2005-09-30
3   2005-10-31
4   2005-11-30
Name: Date, dtype: datetime64[ns]

Validation after reading again:
datetime64[ns]

First few dates after reloading:
0   2005-07-31
1   2005-08-31
2   2005-09-30
3   2005-10-31
4   2005-11-30
Name: Date, dtype: datetime64[ns]


In [3]:
validate = pd.read_csv(file_path, parse_dates=['Date'])
validate.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 243 entries, 0 to 242
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Date         243 non-null    datetime64[ns]
 1   Series_Name  243 non-null    object        
 2   Value        243 non-null    float64       
 3   Unit         243 non-null    object        
dtypes: datetime64[ns](1), float64(1), object(2)
memory usage: 7.7+ KB


# Understanding parse_dates in pandas

The `parse_dates` parameter in `pd.read_csv()`:
- Automatically converts specified columns to datetime when reading a CSV file
- Can be used in different ways:
  1. As a list of column names: `parse_dates=['Date']`
  2. As a boolean: `parse_dates=True` (tries to parse all possible columns)
  3. As a dictionary: `parse_dates={'date_col': ['year', 'month', 'day']}`

In [4]:
# Demonstrate different ways to use parse_dates
file_path = '../datasets_cleaned/Economy/Export_of_Goods_&_Services.csv'

# Method 1: Without parse_dates
df1 = pd.read_csv(file_path)
print("Without parse_dates:")
print(df1['Date'].dtype)

# Method 2: With parse_dates
df2 = pd.read_csv(file_path, parse_dates=['Date'])
print("\nWith parse_dates=['Date']:")
print(df2['Date'].dtype)

# Show sample dates from both
print("\nSample dates comparison:")
print("Without parse_dates:", df1['Date'].head(2))
print("With parse_dates:", df2['Date'].head(2))

Without parse_dates:
object

With parse_dates=['Date']:
datetime64[ns]

Sample dates comparison:
Without parse_dates: 0    2005-07-31
1    2005-08-31
Name: Date, dtype: object
With parse_dates: 0   2005-07-31
1   2005-08-31
Name: Date, dtype: datetime64[ns]


In [12]:
# Create time series plot
fig = px.line(df2, 
              x='Date', 
              y='Value',
              title='Export of Goods & Services Over Time',
              labels={'Value': 'Export Value', 'Date': 'Time Period'},
              template='plotly_white')

# Customize the layout and hover template
fig.update_layout(
    showlegend=False,
    hovermode='x unified',
    xaxis_title="Date",
    yaxis_title="Export Value (Million USD)"
)

# Update hover template to show units
fig.update_traces(
    hovertemplate="<b>Date</b>: %{x}<br>" +
                  "<b>Value</b>: %{y:} Million USD<br>" +
                  "<extra></extra>"
)

# Show the plot
fig.show()