# Data Wrangling

## UV Index (Historical)

In [15]:
import pandas as pd
import glob

# Getting all CSV file
files = glob.glob(r"Datasets/uv-melbourne-20*.csv")

# Defining a function to read and standardize column names
def read_and_standardize(file):
    df = pd.read_csv(file)
    
    # Standardizing the column names
    df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")  

    # Renaming date columns to timestamp
    for col in df.columns:
        if "date" in col or "time" in col:
            df.rename(columns={col: "date_time"}, inplace=True)
    
    return df


# Reading and processing each file
uv_hist_df = pd.concat([read_and_standardize(f) for f in files], ignore_index=True)
uv_hist_df.head()


Unnamed: 0,date_time,lat,lon,uv_index
0,2019-01-01 00:00:00,-37.73,145.1,0.01
1,2019-01-01 00:01:00,-37.73,145.1,0.01
2,2019-01-01 00:02:00,-37.73,145.1,0.01
3,2019-01-01 00:03:00,-37.73,145.1,0.01
4,2019-01-01 00:04:00,-37.73,145.1,0.01


In [16]:
uv_hist_df['state_id'] = "STATE07"
uv_hist_df = uv_hist_df[['date_time', 'state_id', 'lat', 'lon', 'uv_index']]

uv_hist_df

Unnamed: 0,date_time,state_id,lat,lon,uv_index
0,2019-01-01 00:00:00,STATE07,-37.73,145.1,0.01
1,2019-01-01 00:01:00,STATE07,-37.73,145.1,0.01
2,2019-01-01 00:02:00,STATE07,-37.73,145.1,0.01
3,2019-01-01 00:03:00,STATE07,-37.73,145.1,0.01
4,2019-01-01 00:04:00,STATE07,-37.73,145.1,0.01
...,...,...,...,...,...
8570556,2014-12-31 23:55:00,STATE07,-37.73,145.1,0.01
8570557,2014-12-31 23:56:00,STATE07,-37.73,145.1,0.02
8570558,2014-12-31 23:57:00,STATE07,-37.73,145.1,0.02
8570559,2014-12-31 23:58:00,STATE07,-37.73,145.1,0.01


In [17]:
uv_hist_df['uv_index_id'] = ['UVI{:02d}'.format(i+1) for i in range(len(uv_hist_df))]

uv_hist_df = uv_hist_df[['uv_index_id', 'state_id', 'date_time', 'lat',
                         'lon', 'uv_index']]

uv_hist_df

Unnamed: 0,uv_index_id,state_id,date_time,lat,lon,uv_index
0,UVI01,STATE07,2019-01-01 00:00:00,-37.73,145.1,0.01
1,UVI02,STATE07,2019-01-01 00:01:00,-37.73,145.1,0.01
2,UVI03,STATE07,2019-01-01 00:02:00,-37.73,145.1,0.01
3,UVI04,STATE07,2019-01-01 00:03:00,-37.73,145.1,0.01
4,UVI05,STATE07,2019-01-01 00:04:00,-37.73,145.1,0.01
...,...,...,...,...,...,...
8570556,UVI8570557,STATE07,2014-12-31 23:55:00,-37.73,145.1,0.01
8570557,UVI8570558,STATE07,2014-12-31 23:56:00,-37.73,145.1,0.02
8570558,UVI8570559,STATE07,2014-12-31 23:57:00,-37.73,145.1,0.02
8570559,UVI8570560,STATE07,2014-12-31 23:58:00,-37.73,145.1,0.01


In [18]:
# Exporting to CSV
uv_hist_df.to_csv("uv_historical.csv")