In [13]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns

from statsmodels.sandbox.regression.predstd import wls_prediction_std
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.linear_model import LinearRegression
from sklearn import linear_model
from scipy import stats
import warnings
warnings.filterwarnings("ignore")

In [14]:
df = pd.read_csv("Macro Data_Q_NiftyMidcap.csv")
df

df['Date'] = pd.to_datetime(df['Date'])

# Log-transform variables
df['log_PE'] = np.log(df['PE'])
df['log_RR'] = np.log(df['RR'])
df['log_fed'] = np.log(df['Fed'])
df['log_FX'] = np.log(df['FX'])
df['log_gold'] = np.log(df['Gold'])
df['log_oil'] = np.log(df['Oil'])
df['log_lag_PE'] = np.log(df['Lag_PE'])
df['log_lag_PB'] = np.log(df['Lag_PB'])

# Prepare features (X) and target (y)
X = df[['log_RR', 'log_fed', 'log_FX', 'log_gold', 'log_oil','log_lag_PE','log_lag_PB']]
X = sm.add_constant(X)  # Adds a constant term to the predictor

# Define the dependent variable
y = df['log_PE']

# Fit the regression model
model = sm.OLS(y, X).fit()


# Print the summary of the regression model
print(model.summary())

# Make predictions
pred = model.get_prediction(X)
pred_summary = pred.summary_frame(alpha=0.05)

pred_summary['Pred_PE'] = np.exp(pred_summary['mean'])
df['Pred_PE'] = pred_summary['Pred_PE']
df['Pred_Diff'] = df['PE'] - df['Pred_PE']
df['Pred_Percent'] = abs((df['PE'] - df['Pred_PE'])/df['Pred_PE'])
# Divide 'Pred_Percent' into quartiles
df['Pred_Percent_Quartile'] = pd.qcut(df['Pred_Percent'], 4, labels=[1, 2, 3, 4])

# Output the DataFrame to check the new column
print(df[['Pred_Percent', 'Pred_Percent_Quartile']])

# Extract and display the relevant table
#prediction_table = pred_summary[['mean', 'obs_ci_lower', 'obs_ci_upper']]
#print(prediction_table)
print(pred_summary)
pred_summary.to_csv('pred_summary.csv')
df.to_csv('df_summary.csv')


import pandas as pd
import numpy as np
import time
from scipy import stats
import csv

def dataAnalysis():
    df = pd.read_csv("df_summary.csv", encoding="utf-8", index_col="Date")
    df = df.sort_index(ascending=True)
    df['Signal'] = ""
    df['Signal'] = df['Signal'].astype(str)

    n, prev_diff, prev_signal = 0, df.iloc[0]['Pred_Diff'],"H"

    for index, row in df.iterrows():
        if n==0:
            n += 1
            continue
        else:
            if row['Pred_Diff'] < 0 and prev_diff < 0 and prev_signal != "B":
                df.at[index, 'Signal'] = "B"
                prev_signal = "B"
            elif row['Pred_Diff'] > 0 and prev_diff > 0 and prev_signal != "S":
                df.at[index, 'Signal'] = "S"
                prev_signal = "S"
            else:
                df.at[index, 'Signal'] = "H"

            prev_diff = row['Pred_Diff']

    df.to_csv("df_summary_Q_NiftyMidcap.csv")
    

dataAnalysis()

                            OLS Regression Results                            
Dep. Variable:                 log_PE   R-squared:                       0.684
Model:                            OLS   Adj. R-squared:                  0.645
Method:                 Least Squares   F-statistic:                     17.64
Date:                Thu, 19 Sep 2024   Prob (F-statistic):           3.31e-12
Time:                        00:10:46   Log-Likelihood:                -35.013
No. Observations:                  65   AIC:                             86.03
Df Residuals:                      57   BIC:                             103.4
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -1.8675      2.072     -0.901      0.3

In [15]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns

from statsmodels.sandbox.regression.predstd import wls_prediction_std
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.linear_model import LinearRegression
from sklearn import linear_model
from scipy import stats
import warnings
warnings.filterwarnings("ignore")

df = pd.read_csv("Macro Data_Q_NiftySmallcap.csv")
df

df['Date'] = pd.to_datetime(df['Date'])

# Log-transform variables
df['log_PE'] = np.log(df['PE'])
df['log_RR'] = np.log(df['RR'])
df['log_fed'] = np.log(df['Fed'])
df['log_FX'] = np.log(df['FX'])
df['log_gold'] = np.log(df['Gold'])
df['log_oil'] = np.log(df['Oil'])
df['log_lag_PE'] = np.log(df['Lag_PE'])
df['log_lag_PB'] = np.log(df['Lag_PB'])

# Prepare features (X) and target (y)
X = df[['log_RR', 'log_fed', 'log_FX', 'log_gold', 'log_oil','log_lag_PE','log_lag_PB']]
X = sm.add_constant(X)  # Adds a constant term to the predictor

# Define the dependent variable
y = df['log_PE']

# Fit the regression model
model = sm.OLS(y, X).fit()


# Print the summary of the regression model
print(model.summary())

# Make predictions
pred = model.get_prediction(X)
pred_summary = pred.summary_frame(alpha=0.05)

pred_summary['Pred_PE'] = np.exp(pred_summary['mean'])
df['Pred_PE'] = pred_summary['Pred_PE']
df['Pred_Diff'] = df['PE'] - df['Pred_PE']
df['Pred_Percent'] = abs((df['PE'] - df['Pred_PE'])/df['Pred_PE'])
# Divide 'Pred_Percent' into quartiles
df['Pred_Percent_Quartile'] = pd.qcut(df['Pred_Percent'], 4, labels=[1, 2, 3, 4])

# Output the DataFrame to check the new column
print(df[['Pred_Percent', 'Pred_Percent_Quartile']])

# Extract and display the relevant table
#prediction_table = pred_summary[['mean', 'obs_ci_lower', 'obs_ci_upper']]
#print(prediction_table)
print(pred_summary)
pred_summary.to_csv('pred_summary.csv')
df.to_csv('df_summary.csv')


import pandas as pd
import numpy as np
import time
from scipy import stats
import csv

def dataAnalysis():
    df = pd.read_csv("df_summary.csv", encoding="utf-8", index_col="Date")
    df = df.sort_index(ascending=True)
    df['Signal'] = ""
    df['Signal'] = df['Signal'].astype(str)

    n, prev_diff, prev_signal = 0, df.iloc[0]['Pred_Diff'],"H"

    for index, row in df.iterrows():
        if n==0:
            n += 1
            continue
        else:
            if row['Pred_Diff'] < 0 and prev_diff < 0 and prev_signal != "B":
                df.at[index, 'Signal'] = "B"
                prev_signal = "B"
            elif row['Pred_Diff'] > 0 and prev_diff > 0 and prev_signal != "S":
                df.at[index, 'Signal'] = "S"
                prev_signal = "S"
            else:
                df.at[index, 'Signal'] = "H"

            prev_diff = row['Pred_Diff']

    df.to_csv("df_summary_Q_NiftySmallcap.csv")
    

dataAnalysis()

                            OLS Regression Results                            
Dep. Variable:                 log_PE   R-squared:                       0.486
Model:                            OLS   Adj. R-squared:                  0.366
Method:                 Least Squares   F-statistic:                     4.049
Date:                Thu, 19 Sep 2024   Prob (F-statistic):            0.00311
Time:                        00:10:47   Log-Likelihood:                -51.636
No. Observations:                  38   AIC:                             119.3
Df Residuals:                      30   BIC:                             132.4
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -2.9309     16.986     -0.173      0.8

In [16]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns

from statsmodels.sandbox.regression.predstd import wls_prediction_std
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.linear_model import LinearRegression
from sklearn import linear_model
from scipy import stats
import warnings
warnings.filterwarnings("ignore")

df = pd.read_csv("Macro Data_Q_Nifty50.csv")
df

df['Date'] = pd.to_datetime(df['Date'])

# Log-transform variables
df['log_PE'] = np.log(df['PE'])
df['log_RR'] = np.log(df['RR'])
df['log_fed'] = np.log(df['Fed'])
df['log_FX'] = np.log(df['FX'])
df['log_gold'] = np.log(df['Gold'])
df['log_oil'] = np.log(df['Oil'])
df['log_lag_PE'] = np.log(df['Lag_PE'])
df['log_lag_PB'] = np.log(df['Lag_PB'])

# Prepare features (X) and target (y)
X = df[['log_RR', 'log_fed', 'log_FX', 'log_gold', 'log_oil','log_lag_PE','log_lag_PB']]
X = sm.add_constant(X)  # Adds a constant term to the predictor

# Define the dependent variable
y = df['log_PE']

# Fit the regression model
model = sm.OLS(y, X).fit()


# Print the summary of the regression model
print(model.summary())

# Make predictions
pred = model.get_prediction(X)
pred_summary = pred.summary_frame(alpha=0.05)

pred_summary['Pred_PE'] = np.exp(pred_summary['mean'])
df['Pred_PE'] = pred_summary['Pred_PE']
df['Pred_Diff'] = df['PE'] - df['Pred_PE']
df['Pred_Percent'] = abs((df['PE'] - df['Pred_PE'])/df['Pred_PE'])
# Divide 'Pred_Percent' into quartiles
df['Pred_Percent_Quartile'] = pd.qcut(df['Pred_Percent'], 4, labels=[1, 2, 3, 4])

# Output the DataFrame to check the new column
print(df[['Pred_Percent', 'Pred_Percent_Quartile']])

# Extract and display the relevant table
#prediction_table = pred_summary[['mean', 'obs_ci_lower', 'obs_ci_upper']]
#print(prediction_table)
print(pred_summary)
pred_summary.to_csv('pred_summary.csv')
df.to_csv('df_summary.csv')


import pandas as pd
import numpy as np
import time
from scipy import stats
import csv

def dataAnalysis():
    df = pd.read_csv("df_summary.csv", encoding="utf-8", index_col="Date")
    df = df.sort_index(ascending=True)
    df['Signal'] = ""
    df['Signal'] = df['Signal'].astype(str)

    n, prev_diff, prev_signal = 0, df.iloc[0]['Pred_Diff'],"H"

    for index, row in df.iterrows():
        if n==0:
            n += 1
            continue
        else:
            if row['Pred_Diff'] < 0 and prev_diff < 0 and prev_signal != "B":
                df.at[index, 'Signal'] = "B"
                prev_signal = "B"
            elif row['Pred_Diff'] > 0 and prev_diff > 0 and prev_signal != "S":
                df.at[index, 'Signal'] = "S"
                prev_signal = "S"
            else:
                df.at[index, 'Signal'] = "H"

            prev_diff = row['Pred_Diff']

    df.to_csv("df_summary_Q_Nifty50.csv")
    

dataAnalysis()

                            OLS Regression Results                            
Dep. Variable:                 log_PE   R-squared:                       0.798
Model:                            OLS   Adj. R-squared:                  0.774
Method:                 Least Squares   F-statistic:                     33.26
Date:                Thu, 19 Sep 2024   Prob (F-statistic):           3.15e-18
Time:                        00:10:47   Log-Likelihood:                 57.945
No. Observations:                  67   AIC:                            -99.89
Df Residuals:                      59   BIC:                            -82.25
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.0337      0.506      2.043      0.0

In [17]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns

from statsmodels.sandbox.regression.predstd import wls_prediction_std
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.linear_model import LinearRegression
from sklearn import linear_model
from scipy import stats
import warnings
warnings.filterwarnings("ignore")

df = pd.read_csv("Macro Data_Q_NiftyJR.csv")
df

df['Date'] = pd.to_datetime(df['Date'])

# Log-transform variables
df['log_PE'] = np.log(df['PE'])
df['log_RR'] = np.log(df['RR'])
df['log_fed'] = np.log(df['Fed'])
df['log_FX'] = np.log(df['FX'])
df['log_gold'] = np.log(df['Gold'])
df['log_oil'] = np.log(df['Oil'])
df['log_lag_PE'] = np.log(df['Lag_PE'])
df['log_lag_PB'] = np.log(df['Lag_PB'])

# Prepare features (X) and target (y)
X = df[['log_RR', 'log_fed', 'log_FX', 'log_gold', 'log_oil','log_lag_PE','log_lag_PB']]
X = sm.add_constant(X)  # Adds a constant term to the predictor

# Define the dependent variable
y = df['log_PE']

# Fit the regression model
model = sm.OLS(y, X).fit()


# Print the summary of the regression model
print(model.summary())

# Make predictions
pred = model.get_prediction(X)
pred_summary = pred.summary_frame(alpha=0.05)

pred_summary['Pred_PE'] = np.exp(pred_summary['mean'])
df['Pred_PE'] = pred_summary['Pred_PE']
df['Pred_Diff'] = df['PE'] - df['Pred_PE']
df['Pred_Percent'] = abs((df['PE'] - df['Pred_PE'])/df['Pred_PE'])
# Divide 'Pred_Percent' into quartiles
df['Pred_Percent_Quartile'] = pd.qcut(df['Pred_Percent'], 4, labels=[1, 2, 3, 4])

# Output the DataFrame to check the new column
print(df[['Pred_Percent', 'Pred_Percent_Quartile']])

# Extract and display the relevant table
#prediction_table = pred_summary[['mean', 'obs_ci_lower', 'obs_ci_upper']]
#print(prediction_table)
print(pred_summary)
pred_summary.to_csv('pred_summary.csv')
df.to_csv('df_summary.csv')


import pandas as pd
import numpy as np
import time
from scipy import stats
import csv

def dataAnalysis():
    df = pd.read_csv("df_summary.csv", encoding="utf-8", index_col="Date")
    df = df.sort_index(ascending=True)
    df['Signal'] = ""
    df['Signal'] = df['Signal'].astype(str)

    n, prev_diff, prev_signal = 0, df.iloc[0]['Pred_Diff'],"H"

    for index, row in df.iterrows():
        if n==0:
            n += 1
            continue
        else:
            if row['Pred_Diff'] < 0 and prev_diff < 0 and prev_signal != "B":
                df.at[index, 'Signal'] = "B"
                prev_signal = "B"
            elif row['Pred_Diff'] > 0 and prev_diff > 0 and prev_signal != "S":
                df.at[index, 'Signal'] = "S"
                prev_signal = "S"
            else:
                df.at[index, 'Signal'] = "H"

            prev_diff = row['Pred_Diff']

    df.to_csv("df_summary_Q_NiftyJR.csv")
    

dataAnalysis()

                            OLS Regression Results                            
Dep. Variable:                 log_PE   R-squared:                       0.757
Model:                            OLS   Adj. R-squared:                  0.728
Method:                 Least Squares   F-statistic:                     25.86
Date:                Thu, 19 Sep 2024   Prob (F-statistic):           1.18e-15
Time:                        00:10:47   Log-Likelihood:                 19.336
No. Observations:                  66   AIC:                            -22.67
Df Residuals:                      58   BIC:                            -5.156
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.3850      0.820      0.470      0.6