In [1]:
input_file_path = 'SMI_Historical_Data(10Nov).csv'
output_file_path = 'SMI_log_returns(10Nov).csv'

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math

# Function to process CSV files
def process_csv(file_path):
    try:
        df = pd.read_csv(file_path)
        # Perform some processing on the DataFrame
        df.fillna(0, inplace=True)  # Example: Fill NaN values with 0
        return df
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

# Example usage
processed_df = process_csv(input_file_path)

# Remove ,'s and convert to numeric
def to_numeric_remove_commas(series):
    return pd.to_numeric(series.str.replace(',', ''))

processed_df['Date'] = pd.to_datetime(processed_df['Date'])

# Converting each column data type
if 'Price' in processed_df.columns:
    processed_df['Close'] = to_numeric_remove_commas(processed_df['Price'])
    del processed_df['Price']
else:
    processed_df['Close'] = to_numeric_remove_commas(processed_df['Close'])

processed_df['Open'] = to_numeric_remove_commas(processed_df['Open'])
processed_df['High'] = to_numeric_remove_commas(processed_df['High'])
processed_df['Low'] = to_numeric_remove_commas(processed_df['Low'])

processed_df = processed_df.sort_values(by=["Date"], ascending=True)

print(processed_df.head())

          Date      Open      High       Low    Vol. Change %     Close
721 2022-01-03  12934.42  12997.15  12905.53  30.07M    0.49%  12939.17
720 2022-01-04  12977.59  12979.76  12884.48  38.72M   -0.30%  12900.97
719 2022-01-05  12868.00  12906.37  12825.90  33.32M    0.04%  12906.37
718 2022-01-06  12754.51  12816.12  12728.44  40.33M   -0.88%  12792.28
717 2022-01-07  12754.41  12821.44  12715.49  42.71M    0.04%  12797.94


In [3]:
new_df = processed_df[['Date', 'Close']].copy()
closing_prices = new_df['Close']

log_returns = np.log(closing_prices / closing_prices.shift(1))
new_df['log_returns'] = log_returns
new_df['log_prices'] = np.log(closing_prices)

print("new_df")
print(new_df.head())
print()

print("closing_prices")
print(closing_prices.head())

new_df
          Date     Close  log_returns  log_prices
721 2022-01-03  12939.17          NaN    9.468014
720 2022-01-04  12900.97    -0.002957    9.465058
719 2022-01-05  12906.37     0.000418    9.465476
718 2022-01-06  12792.28    -0.008879    9.456597
717 2022-01-07  12797.94     0.000442    9.457039

closing_prices
721    12939.17
720    12900.97
719    12906.37
718    12792.28
717    12797.94
Name: Close, dtype: float64


In [4]:
# Get new dataframe into a csv file
new_df.to_csv(output_file_path, index=False)