In [1]:
import pandas as pd
import numpy as np

In [31]:
VIX_Data = pd.read_csv('Datasetsv2/VIX_History.csv')

In [32]:
# check for missing values
VIX_Data_missing = VIX_Data.isnull().any().any()
print(f"Are there any missing values? {VIX_Data_missing}")

Are there any missing values? False


In [33]:
VIX_Data.head()

Unnamed: 0,DATE,OPEN,HIGH,LOW,CLOSE
0,01/02/1990,17.24,17.24,17.24,17.24
1,01/03/1990,18.19,18.19,18.19,18.19
2,01/04/1990,19.22,19.22,19.22,19.22
3,01/05/1990,20.11,20.11,20.11,20.11
4,01/08/1990,20.26,20.26,20.26,20.26


In [34]:
# drop unwanted columns
# Drop columns
VIX_Data = VIX_Data.drop(columns=['OPEN', 'HIGH', 'LOW'])

In [35]:
VIX_Data.head()

Unnamed: 0,DATE,CLOSE
0,01/02/1990,17.24
1,01/03/1990,18.19
2,01/04/1990,19.22
3,01/05/1990,20.11
4,01/08/1990,20.26


In [36]:
# rename columns
VIX_Data.rename(columns={'DATE': 'Date', 'CLOSE': 'VIX_Value'}, inplace=True)

In [37]:
VIX_Data.dtypes

Date          object
VIX_Value    float64
dtype: object

In [38]:
# convert date to date time format
VIX_Data['Date'] = pd.to_datetime(VIX_Data['Date'])

In [39]:
VIX_Data.describe()

Unnamed: 0,Date,VIX_Value
count,8843,8843.0
mean,2007-07-18 04:55:23.577971200,19.461931
min,1990-01-02 00:00:00,9.14
25%,1998-10-03 12:00:00,13.83
50%,2007-07-20 00:00:00,17.61
75%,2016-04-30 12:00:00,22.81
max,2025-01-15 00:00:00,82.69
std,,7.836868


In [40]:
VIX_Data.head()

Unnamed: 0,Date,VIX_Value
0,1990-01-02,17.24
1,1990-01-03,18.19
2,1990-01-04,19.22
3,1990-01-05,20.11
4,1990-01-08,20.26


In [41]:
# add missing dates and forward fill to fill missing dates' value
VIX_Data.set_index('Date', inplace=True)

# Create a complete date range from the minimum to maximum date in the dataset
all_dates = pd.date_range(start=VIX_Data.index.min(), end=VIX_Data.index.max())

# Reindex the DataFrame to include all dates, filling missing dates with NaN
VIX_Data = VIX_Data.reindex(all_dates)

# Forward-fill missing 'Price' values without using 'inplace'
VIX_Data['VIX_Value'] = VIX_Data['VIX_Value'].ffill()

# Reset the index to make 'Date' a column again
VIX_Data.reset_index(inplace=True)
VIX_Data.rename(columns={'index': 'Date'}, inplace=True)

In [42]:
VIX_Data.head()

Unnamed: 0,Date,VIX_Value
0,1990-01-02,17.24
1,1990-01-03,18.19
2,1990-01-04,19.22
3,1990-01-05,20.11
4,1990-01-06,20.11


In [46]:
# remove unwanted dates
start_date = '2025-01-11'
end_date = '2025-01-15'

VIX_Data = VIX_Data[~VIX_Data['Date'].between(start_date, end_date)]

In [48]:
VIX_Data = VIX_Data.drop(VIX_Data.index[-1])

In [49]:
VIX_Data.tail()

Unnamed: 0,Date,VIX_Value
12787,2025-01-05,16.13
12788,2025-01-06,16.04
12789,2025-01-07,17.82
12790,2025-01-08,17.7
12791,2025-01-09,18.07


In [50]:
VIX_Data.describe()

Unnamed: 0,Date,VIX_Value
count,3654,3654.0
mean,2020-01-09 12:00:00,18.17289
min,2015-01-09 00:00:00,9.14
25%,2017-07-10 06:00:00,13.28
50%,2020-01-09 12:00:00,16.255
75%,2022-07-10 18:00:00,21.265
max,2025-01-09 00:00:00,82.69
std,,7.19208


In [51]:
VIX_Data.to_csv('Datasetsv2/preprocessed/VIX_Data_Preprocessed.csv', index=False)