In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
 #Load the datasets

beml_df = pd.read_csv("BEML.csv")
glaxo_df = pd.read_csv("GLAXO.csv")


In [None]:
print("BEML Info:")
print(beml_df.info())
print("\nGLAXO Info:")
print(glaxo_df.info())

In [None]:
print("Missing in BEML:\n", beml_df.isnull().sum())
print("Missing in GLAXO:\n", glaxo_df.isnull().sum())

In [None]:
missing_percent_beml = (beml_df.isnull().sum() / len(beml_df)) * 100
missing_percent_glaxo = (glaxo_df.isnull().sum() / len(glaxo_df)) * 100
print("\n% Missing (BEML):\n", missing_percent_beml)
print("\n% Missing (GLAXO):\n", missing_percent_glaxo)

In [None]:
#drop 
threshold = 40
beml_df = beml_df.loc[:, missing_percent_beml < threshold]
glaxo_df = glaxo_df.loc[:, missing_percent_glaxo < threshold]

In [None]:
print("Duplicates in BEML:", beml_df.duplicated().sum())
print("Duplicates in GLAXO:", glaxo_df.duplicated().sum())


In [None]:
# convert Date into datetime
beml_df['Date'] = pd.to_datetime(beml_df['Date'])
glaxo_df['Date'] = pd.to_datetime(glaxo_df['Date'])


In [None]:
# short by date
beml_df.sort_values('Date', inplace=True)
glaxo_df.sort_values('Date', inplace=True)


In [None]:
# extract closing price
beml_close = beml_df['Close']
glaxo_close = glaxo_df['Close']


In [None]:
 10. Calculate Measures of Dispersion
def calculate_dispersion(data, label):
    print(f"\n--- Dispersion Measures for {label} ---")
    print("Mean:", np.mean(data))
    print("Median:", np.median(data))
    print("Range:", np.max(data) - np.min(data))
    print("IQR:", np.percentile(data, 75) - np.percentile(data, 25))
    print("Variance:", np.var(data))
    print("Std Deviation:", np.std(data))
    print("Mean Absolute Deviation:", np.mean(np.abs(data - np.mean(data))))
    print("Coefficient of Variation:", np.std(data) / np.mean(data))

calculate_dispersion(beml_close, "BEML")
calculate_dispersion(glaxo_close, "GLAXO")


In [None]:
# Boxplot

plt.figure(figsize=(10, 4))
sns.boxplot(data=[beml_close, glaxo_close], orient="v")
plt.xticks([0, 1], ['BEML', 'GLAXO'])
plt.title("Boxplot - Closing Prices")
plt.show()

In [None]:
# Histogram

plt.figure(figsize=(10, 4))
plt.hist(beml_close, alpha=0.5, label='BEML')
plt.hist(glaxo_close, alpha=0.5, label='GLAXO')
plt.title("Histogram of Closing Prices")
plt.legend()
plt.show()

In [None]:
# Time series
plt.figure(figsize=(12, 5))
plt.plot(beml_df['Date'], beml_close, label='BEML')
plt.plot(glaxo_df['Date'], glaxo_close, label='GLAXO')
plt.title("Stock Prices Over Time")
plt.xlabel("Date")
plt.ylabel("Close Price")
plt.legend()
plt.show()

In [None]:
# Log Returns

beml_log_ret = np.log(beml_close / beml_close.shift(1))
glaxo_log_ret = np.log(glaxo_close / glaxo_close.shift(1))

# Compare Return Volatility

print("\nBEML Log Return STD:", beml_log_ret.std())
print("GLAXO Log Return STD:", glaxo_log_ret.std())

In [None]:
# Correlation Matrix

df_combined = pd.DataFrame({
    'BEML': beml_close,
    'GLAXO': glaxo_close
})
print("\nCorrelation Matrix:\n", df_combined.corr())

In [None]:
# Risk Decision
# make varible of cv
if cv_beml < cv_glaxo:
    print("\n✅ BEML is less risky based on Coefficient of Variation.")
else:
    print("\n✅ GLAXO is less risky based on Coefficient of Variation.")

In [None]:
q2

# Extract
df_glaxo = pd.read_csv('/mnt/data/GLAXO.csv')
df_glaxo['Date'] = pd.to_datetime(df_glaxo['Date'])

# Feature Engineering
df_glaxo['Year'] = df_glaxo['Date'].dt.year
df_glaxo['Month'] = df_glaxo['Date'].dt.month
df_glaxo['Day'] = df_glaxo['Date'].dt.day
df_glaxo['Daily_Return'] = df_glaxo['Close'].pct_change()
df_glaxo['Rolling_Mean_7'] = df_glaxo['Close'].rolling(window=7).mean()
df_glaxo['Rolling_Std_7'] = df_glaxo['Close'].rolling(window=7).std()

# Gain Calculation
df_glaxo['Gain'] = df_glaxo['Close'].diff()
df_glaxo['Cumulative_Gain'] = df_glaxo['Gain'].cumsum()

# Time Plot
plt.figure(figsize=(12,6))
plt.plot(df_glaxo['Date'], df_glaxo['Close'], label='GLAXO Close Price', color='green')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.title('GLAXO Stock Close Price Over Time')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
