In [None]:
pip install linearmodels

Collecting linearmodels
  Downloading linearmodels-6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.9 kB)
Collecting mypy-extensions>=0.4 (from linearmodels)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 kB)
Collecting pyhdfe>=0.1 (from linearmodels)
  Downloading pyhdfe-0.2.0-py3-none-any.whl.metadata (4.0 kB)
Collecting formulaic>=1.0.0 (from linearmodels)
  Downloading formulaic-1.0.2-py3-none-any.whl.metadata (6.8 kB)
Collecting setuptools-scm<9.0.0,>=8.0.0 (from setuptools-scm[toml]<9.0.0,>=8.0.0->linearmodels)
  Downloading setuptools_scm-8.1.0-py3-none-any.whl.metadata (6.6 kB)
Collecting interface-meta>=1.2.0 (from formulaic>=1.0.0->linearmodels)
  Downloading interface_meta-1.3.0-py3-none-any.whl.metadata (6.7 kB)
Downloading linearmodels-6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hD

In [None]:
import pandas as pd
import statsmodels.api as sm
from google.colab import drive
from linearmodels import PanelOLS, RandomEffects, FirstDifferenceOLS
from linearmodels.panel import compare

Accessing drive

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


Reading data from the excel file

In [None]:
dfr = pd.read_excel(r'/content/drive/My Drive/CMIE_PROJECT/panel.xlsx', sheet_name='mainpanel')

Dropping irrelevant variables

In [None]:
dfr=dfr.drop(columns=[ 'Size', 'Net fixed assets', 'IT/ITES & other professional services','Additions to computers and IT systems during the year','Gross computers and IT systems','Total assets','Export / Sales (%)','Gross software','Net profit margin','Net cash inflow or (outflow) from investing activities','Total forex earnings / Total income (%)','Raw material imports / Raw material purchases (%)'])

In [None]:
df=dfr.copy()

Filling missing values, linearly

In [None]:
def fill_nan_linearly(column):
    return column.interpolate(method='linear', limit_direction='both')

filled_df = df.groupby('Companies').apply(lambda group: group.apply(fill_nan_linearly, axis=0))
filled_df.reset_index(drop=True, inplace=True)

In [None]:
df=filled_df.copy()

Converting Year to categorical

In [None]:
year = pd.Categorical(df.Year)

In [None]:
df = df.set_index(["Companies", "Year"])

In [None]:
df["Year"] = year

In [None]:
df.columns = df.columns.str.replace(' ', '_')

# **Fixed Effect**

**Time Invariant**

In [None]:
exo=['Computer_IT,_net_addition_in_year', 'Net_computers_and_IT_systems',
       'Net_plant_&_machinery,_computers_and_electrical_installations',
       'Net_software', 'Profit_after_tax', 'Total_expenses', 'Year']
exog = sm.add_constant(df[exo])

In [None]:
modfeti = PanelOLS(df.Total_income, exog, entity_effects=True)

In [None]:
fe_res_ti = modfeti.fit()
print(fe_res_ti)

                          PanelOLS Estimation Summary                           
Dep. Variable:           Total_income   R-squared:                        0.9885
Estimator:                   PanelOLS   R-squared (Between):              0.9997
No. Observations:               56126   R-squared (Within):               0.9885
Date:                Thu, Jul 25 2024   R-squared (Overall):              0.9978
Time:                        12:38:45   Log-likelihood                -5.121e+05
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                   2.366e+05
Entities:                        4009   P-value                           0.0000
Avg Obs:                       14.000   Distribution:                F(19,52098)
Min Obs:                       14.000                                           
Max Obs:                       14.000   F-statistic (robust):          2.366e+05
                            

**Time Effect**

In [None]:
exo=['Computer_IT,_net_addition_in_year', 'Net_computers_and_IT_systems',
       'Net_plant_&_machinery,_computers_and_electrical_installations',
       'Net_software', 'Profit_after_tax', 'Total_expenses']
exog = sm.add_constant(df[exo])

In [None]:
modfete = PanelOLS(df.Total_income, exog, entity_effects=True, time_effects=True)

In [None]:
fe_res_te = modfete.fit()
print(fe_res_te)

                          PanelOLS Estimation Summary                           
Dep. Variable:           Total_income   R-squared:                        0.9881
Estimator:                   PanelOLS   R-squared (Between):              0.9997
No. Observations:               56126   R-squared (Within):               0.9885
Date:                Thu, Jul 25 2024   R-squared (Overall):              0.9978
Time:                        12:38:46   Log-likelihood                -5.121e+05
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                   7.223e+05
Entities:                        4009   P-value                           0.0000
Avg Obs:                       14.000   Distribution:                 F(6,52098)
Min Obs:                       14.000                                           
Max Obs:                       14.000   F-statistic (robust):          7.223e+05
                            

# **Random Effect**

In [None]:
exo=['Computer_IT,_net_addition_in_year', 'Net_computers_and_IT_systems',
       'Net_plant_&_machinery,_computers_and_electrical_installations',
       'Net_software', 'Profit_after_tax', 'Total_expenses', 'Year']
exog = sm.add_constant(df[exo])

In [None]:
mod = RandomEffects(df.Total_income, exog)
re_res = mod.fit()
print(re_res)

                        RandomEffects Estimation Summary                        
Dep. Variable:           Total_income   R-squared:                        0.9973
Estimator:              RandomEffects   R-squared (Between):              0.9997
No. Observations:               56126   R-squared (Within):               0.9885
Date:                Thu, Jul 25 2024   R-squared (Overall):              0.9979
Time:                        12:38:47   Log-likelihood                -5.144e+05
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                   1.095e+06
Entities:                        4009   P-value                           0.0000
Avg Obs:                       14.000   Distribution:                F(19,56106)
Min Obs:                       14.000                                           
Max Obs:                       14.000   F-statistic (robust):          1.095e+06
                            

# **First Difference OLS**

In [None]:
exo=['Computer_IT,_net_addition_in_year', 'Net_computers_and_IT_systems',
       'Net_plant_&_machinery,_computers_and_electrical_installations',
       'Net_software', 'Profit_after_tax', 'Total_expenses']
exog = df[exo]

In [None]:
mod = FirstDifferenceOLS(df.Total_income, exog)

In [None]:
fd_res = mod.fit()
print(fd_res)

                     FirstDifferenceOLS Estimation Summary                      
Dep. Variable:           Total_income   R-squared:                        0.9234
Estimator:         FirstDifferenceOLS   R-squared (Between):              0.9994
No. Observations:               52117   R-squared (Within):               0.9879
Date:                Thu, Jul 25 2024   R-squared (Overall):              0.9976
Time:                        12:38:48   Log-likelihood                -4.944e+05
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                   1.048e+05
Entities:                        4009   P-value                           0.0000
Avg Obs:                       14.000   Distribution:                 F(6,52111)
Min Obs:                       14.000                                           
Max Obs:                       14.000   F-statistic (robust):          1.048e+05
                            

In [None]:
print(compare({"FE_TI": fe_res_ti, "FE_TE": fe_res_te, "RE": re_res, "FD":fd_res}))

                                                              Model Comparison                                                              
                                                                             FE_TI            FE_TE                RE                     FD
--------------------------------------------------------------------------------------------------------------------------------------------
Dep. Variable                                                         Total_income     Total_income      Total_income           Total_income
Estimator                                                                 PanelOLS         PanelOLS     RandomEffects     FirstDifferenceOLS
No. Observations                                                             56126            56126             56126                  52117
Cov. Est.                                                               Unadjusted       Unadjusted        Unadjusted             Unadjusted
R-squared    