In [22]:
import numpy as np
import pandas as pd
from datetime import datetime

In [23]:
df = pd.read_csv("Data-GP1-1.csv")
df

Unnamed: 0,Mon,Tue,Wed,Thu,Date,Stormy,Mixed,p,q,Rainy,Cold,Wind
0,1,0,0,0,911202,1,0,-0.430783,8.994421,1,0,2.995732
1,0,1,0,0,911203,1,0,0.000000,7.707063,0,0,2.995732
2,0,0,1,0,911204,0,1,0.072321,8.350194,1,1,2.813411
3,0,0,0,1,911205,1,0,0.247139,8.656955,0,1,3.036554
4,0,0,0,0,911206,1,0,0.664327,7.844241,0,1,3.036554
...,...,...,...,...,...,...,...,...,...,...,...,...
106,1,0,0,0,920504,0,0,-0.798508,8.610683,0,0,2.862201
107,0,1,0,0,920505,0,1,-0.087011,7.162397,0,0,2.908721
108,0,0,1,0,920506,0,1,0.184922,7.362010,0,0,2.862201
109,0,0,0,1,920507,0,1,0.223143,8.764053,0,0,2.813411


In [24]:
og_columns = df.columns
weekday_list = ["Mon","Tue","Wed","Thu","Fri","Sat","Sun"]

# Adding new columns and rearranging it
df["Fri"] = 0
df["Sat"] = 0
df["Sun"] = 0

# Apply datetime operations to convert to weekday
df["Day"] = pd.to_datetime(df["Date"], format="%y%m%d").apply(lambda x: weekday_list[x.weekday()])

df["Year"] = df["Date"].astype(str).str[:2]
df["Month"] = df["Date"].astype(str).str[2:4]
df["Date"] = df["Date"].astype(str).str[4:]

new_columns = og_columns[:4].tolist()
new_columns.extend(["Fri","Sat","Sun","Day"]) 
new_columns.extend([og_columns[4]])
new_columns.extend(["Month", "Year"])
new_columns.extend(og_columns[5:])

# Reorder the columns
df = df.reindex(columns=new_columns)
df


Unnamed: 0,Mon,Tue,Wed,Thu,Fri,Sat,Sun,Day,Date,Month,Year,Stormy,Mixed,p,q,Rainy,Cold,Wind
0,1,0,0,0,0,0,0,Mon,02,12,91,1,0,-0.430783,8.994421,1,0,2.995732
1,0,1,0,0,0,0,0,Tue,03,12,91,1,0,0.000000,7.707063,0,0,2.995732
2,0,0,1,0,0,0,0,Wed,04,12,91,0,1,0.072321,8.350194,1,1,2.813411
3,0,0,0,1,0,0,0,Thu,05,12,91,1,0,0.247139,8.656955,0,1,3.036554
4,0,0,0,0,0,0,0,Fri,06,12,91,1,0,0.664327,7.844241,0,1,3.036554
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,1,0,0,0,0,0,0,Mon,04,05,92,0,0,-0.798508,8.610683,0,0,2.862201
107,0,1,0,0,0,0,0,Tue,05,05,92,0,1,-0.087011,7.162397,0,0,2.908721
108,0,0,1,0,0,0,0,Wed,06,05,92,0,1,0.184922,7.362010,0,0,2.862201
109,0,0,0,1,0,0,0,Thu,07,05,92,0,1,0.223143,8.764053,0,0,2.813411


In [26]:
# Update One-Hot Encoding for Fri, Sat and Sun
for day in ["Fri","Sat", "Sun"]:
    df[day] = (df["Day"] == day).astype(int)

# Check if the One-Hot Encoding is done correctly
df["Date Error"] = (
    (df["Day"].isin(weekday_list)) &  # Check if "Day" contains valid weekdays
    (df[weekday_list].sum(axis=1) != 1)  # Check if exactly one day is encoded as 1
)

df

Unnamed: 0,Mon,Tue,Wed,Thu,Fri,Sat,Sun,Day,Date,Month,Year,Stormy,Mixed,p,q,Rainy,Cold,Wind,Date Error
0,1,0,0,0,0,0,0,Mon,02,12,91,1,0,-0.430783,8.994421,1,0,2.995732,False
1,0,1,0,0,0,0,0,Tue,03,12,91,1,0,0.000000,7.707063,0,0,2.995732,False
2,0,0,1,0,0,0,0,Wed,04,12,91,0,1,0.072321,8.350194,1,1,2.813411,False
3,0,0,0,1,0,0,0,Thu,05,12,91,1,0,0.247139,8.656955,0,1,3.036554,False
4,0,0,0,0,1,0,0,Fri,06,12,91,1,0,0.664327,7.844241,0,1,3.036554,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,1,0,0,0,0,0,0,Mon,04,05,92,0,0,-0.798508,8.610683,0,0,2.862201,False
107,0,1,0,0,0,0,0,Tue,05,05,92,0,1,-0.087011,7.162397,0,0,2.908721,False
108,0,0,1,0,0,0,0,Wed,06,05,92,0,1,0.184922,7.362010,0,0,2.862201,False
109,0,0,0,1,0,0,0,Thu,07,05,92,0,1,0.223143,8.764053,0,0,2.813411,False


In [28]:
incorrect_encoding = df[df["Date Error"] == True]

if incorrect_encoding.empty:
    # Drop "Date Error" column
    df.drop("Date Error", axis=1, inplace=True)

    # Specify the file path to save the CSV file
    file_path = "./Data-GP1-1(updated).csv"

    # Save the DataFrame as a CSV file
    df.to_csv(file_path, index=False)  # Set index=False to exclude the DataFrame index from the CSV file

    print(f"DataFrame saved as {file_path}")
else:
    # Print out rows with date errors
    print(incorrect_encoding)

DataFrame saved as ./Data-GP1-1(updated).csv


In [19]:
# Check if csv file is downloaded properly.
df = pd.read_csv("Data-GP1-1(updated).csv")
df

FileNotFoundError: [Errno 2] No such file or directory: 'Data-GP1-1(updated).csv'