In [34]:
# import libraries

import os
import chardet
import pandas as pd
import numpy as np
import plotly.express as px


# set dataframe display 
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 10000)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 10000)

In [33]:
# Rename files

SAVED_CHANGI_FILES = 'changi_csv_files'

changi_files = os.listdir(SAVED_CHANGI_FILES)

changi_files = sorted(changi_files)

month_map = {"jan": "01",
             "feb": "02",
             "mar": "03",
             "apr": "04",
             "may": "05",
             "jun": "06",
             "jul": "07",
             "aug": "08",
             "sep": "09",
             "oct": "10",
             "nov": "11",
             "dec": "12"}



for file in changi_files:  

    try:

        if file == ".DS_Store":
            continue
        else:
            # print(f"Before renaming file name: {file}") # debug

            year, month, locExtension = file.split("_")
            # print(year, month, locExtension)

            if month in month_map:
                renamed_file = f"{year}_{month_map[month]}_{month}_{locExtension}"
                # print(f"After renaming file: {renamed_file}") # debug
 

                old_file = os.path.join(SAVED_CHANGI_FILES, file)
                # print(f"Old file path: {old_file}") # debug


                new_file = os.path.join(SAVED_CHANGI_FILES, renamed_file)
                # print(f"New file path: {new_file}\n") # debug

                os.rename(old_file, new_file)

                print(f"File has been renamed from {old_file} to {new_file}")
    
    except Exception as e:
        print(e)

Before renaming file name: 1980_apr_changi.csv
After renaming file: 1980_04_apr_changi.csv
Old file path: changi_csv_files/1980_apr_changi.csv
New file path: changi_csv_files/1980_04_apr_changi.csv

File has been renamed from changi_csv_files/1980_apr_changi.csv to changi_csv_files/1980_04_apr_changi.csv
Before renaming file name: 1980_aug_changi.csv
After renaming file: 1980_08_aug_changi.csv
Old file path: changi_csv_files/1980_aug_changi.csv
New file path: changi_csv_files/1980_08_aug_changi.csv

File has been renamed from changi_csv_files/1980_aug_changi.csv to changi_csv_files/1980_08_aug_changi.csv
Before renaming file name: 1980_dec_changi.csv
After renaming file: 1980_12_dec_changi.csv
Old file path: changi_csv_files/1980_dec_changi.csv
New file path: changi_csv_files/1980_12_dec_changi.csv

File has been renamed from changi_csv_files/1980_dec_changi.csv to changi_csv_files/1980_12_dec_changi.csv
Before renaming file name: 1980_feb_changi.csv
After renaming file: 1980_02_feb_ch

In [40]:
# check for encoding

encoding_list = []

for file in os.listdir(SAVED_CHANGI_FILES):
    file_path = os.path.join(SAVED_CHANGI_FILES, file)

    with open(file_path, "rb") as f:
        data = f.read()

    encoding_result = chardet.detect(data)
    
    encoding = encoding_result["encoding"]

    encoding_list.append(encoding)

print(set(encoding_list))

{'ISO-8859-1', 'Windows-1252', 'UTF-8-SIG'}


In [49]:
# Rename each csv file to new headers

sample_df = pd.read_csv("changi_csv_files/1980_01_jan_changi.csv", encoding = "ISO-8859-1" )

column_headers_map = {"Station": "station",
                      "Year": "year",
                      "Month": "month",
                      "Day": "date",
                      "Daily Rainfall Total (mm)": "tot_daily_rf",
                      "Highest 30 Min Rainfall (mm)": "highest_30_min_rf",
                      "Highest 60 Min Rainfall (mm)": "highest_60_min_rf",
                      "Highest 120 Min Rainfall (mm)": "highest_120_min_rf",
                      "Mean Temperature (°C)": "mean_temp",
                      "Maximum Temperature (°C)": "max_temp",
                      "Minimum Temperature (°C)": "min_temp",
                      "Mean Wind Speed (km/h)": "mean_wind_speed",
                      "Max Wind Speed (km/h)": "max_win_speed"
                      }

print(list(sample_df.columns.values))

['Station', 'Year', 'Month', 'Day', 'Daily Rainfall Total (mm)', 'Highest 30 Min Rainfall (mm)', 'Highest 60 Min Rainfall (mm)', 'Highest 120 Min Rainfall (mm)', 'Mean Temperature (°C)', 'Maximum Temperature (°C)', 'Minimum Temperature (°C)', 'Mean Wind Speed (km/h)', 'Max Wind Speed (km/h)']


In [42]:
# append into df

files_df = []

for file in os.listdir(SAVED_CHANGI_FILES):
    
    file_path = os.path.join(SAVED_CHANGI_FILES, file)

    with open(file_path, "rb") as f:
        data = f.read()
    
    encoding_result = chardet.detect(data)

    encoding = encoding_result["encoding"]
    
    data_file = pd.read_csv(file_path, encoding = encoding)

    files_df.append(data_file)

print(files_df[:50])

[   Station  Year  Month  Day  Daily Rainfall Total (mm)  Highest 30 Min Rainfall (mm)  Highest 60 Min Rainfall (mm)  Highest 120 Min Rainfall (mm)  Mean Temperature (°C)  Maximum Temperature (°C)  Minimum Temperature (°C)  Mean Wind Speed (km/h)  Max Wind Speed (km/h)
0   Changi  2015     10    1                        0.0                           0.0                           0.0                            0.0                   28.7                      32.3                      27.2                    10.0                   37.4
1   Changi  2015     10    2                       28.6                          19.2                          19.6                           19.6                   27.5                      31.6                      23.5                     7.2                   43.9
2   Changi  2015     10    3                        0.0                           0.0                           0.0                            0.0                   28.5                      3