In [1]:
import pandas as pd
import os

# Initialize an empty list to store the dataframes
dataframes = []

# Specify the directory containing the CSV files
directory_path = "../DATASETS/CSE_DATA/SHARE_CHANGE/"

filenames = [os.path.join(directory_path, file) for file in os.listdir(directory_path) if file.endswith('.csv')]

# Initialize an empty DataFrame to store the data
data = pd.DataFrame()

# Loop through the CSV files and read them into DataFrames
for file in filenames:
    df = pd.read_csv(file)
    # Extract the label from the file name and remove the ".csv" extension
    label = os.path.basename(file).replace(".csv", "")
    # Add the label as a new column
    df['Label'] = label
    dataframes.append(df)

# Concatenate all DataFrames into a single DataFrame
combined_df = pd.concat(dataframes, ignore_index=True)

# Filter out data associated with the label 'SWAD.N0000'
combined_df = combined_df[combined_df['Label'] != 'SWAD.N0000']

# Convert 'Trade Date' from MM/DD/YY to DD/MM/YYYY format
combined_df['Trade Date'] = pd.to_datetime(combined_df['Trade Date'], format='%m/%d/%y').dt.strftime('%d/%m/%Y')

# Split 'Trade Date' into 'Day', 'Month', and 'Year' columns
combined_df[['Day', 'Month', 'Year']] = combined_df['Trade Date'].str.split('/', expand=True)

# Specify the columns you want to calculate the median for
selected_columns = ['Label', 'Year', 'Month', 'TradeVolume', 'ShareVolume', 'Turnover (Rs.)']
result_df = combined_df[selected_columns]

# Group by 'Label', 'Year', and 'Month' and calculate the median for all selected columns
agg_dict = {
    'TradeVolume': 'median',
    'ShareVolume': 'median',
    'Turnover (Rs.)': 'median'
}
final_result = result_df.groupby(['Label', 'Year', 'Month']).agg(agg_dict).reset_index()

# Sort the DataFrame by 'Label' and the new date columns
final_result = final_result.sort_values(by=['Year', 'Month', 'Label'])
final_result

Unnamed: 0,Label,Year,Month,TradeVolume,ShareVolume,Turnover (Rs.)
19629,JKH.N0000,1986,08,254.0,665786.0,88012358.50
35686,SUN.N0000,1990,03,0.0,0.0,0.00
16090,GREG.N0000,1990,04,0.0,0.0,0.00
32359,SELI.N0000,1991,01,0.0,0.0,0.00
33994,SHAL.N0000,1991,02,0.0,0.0,0.00
...,...,...,...,...,...,...
40074,VPEL.N0000,2023,10,29.5,70013.0,481824.00
40078,WAPO.N0000,2023,10,50.5,23546.5,762070.60
40388,WATA.N0000,2023,10,52.0,22318.0,1655065.80
40419,WIND.N0000,2023,10,20.5,64989.5,1202721.35


In [2]:
# Export the final_result DataFrame to a CSV file
final_result.to_csv('final_stock_value_with_internal_factors.csv', index=False)