In [1]:
import os
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
import seaborn as sns
import matplotlib.pyplot as plt
from statsmodels.miscmodels.ordinal_model import OrderedModel


In [2]:
os.chdir('D:\\Assignments_SCMA632\\Data')
print(os.getcwd())


D:\Assignments_SCMA632\Data


In [4]:
# Load the dataset
df = pd.read_csv('NSSO68.csv',low_memory=False)

In [5]:
# Filter the data for 'Pun' state and select relevant columns
subset_df = df[df['state_1'] == 'Pun'][['eggsno_q', 'fishprawn_q', 'goatmeat_q', 'beef_q', 'chicken_q', 'pork_q']]
print(subset_df)

       eggsno_q  fishprawn_q  goatmeat_q  beef_q  chicken_q  pork_q
36170       0.0          0.0         0.0     0.0   0.000000     0.0
36171       0.0          0.0         0.0     0.0   0.000000     0.0
36172       0.0          0.0         0.0     0.0   0.000000     0.0
36173       0.0          0.0         0.0     0.0   0.000000     0.0
36174       0.0          0.0         0.0     0.0   0.000000     0.0
...         ...          ...         ...     ...        ...     ...
99553       0.0          0.0         0.0     0.0   0.000000     0.0
99554       0.0          0.0         0.0     0.0   0.000000     0.0
99555       0.0          0.0         0.0     0.0   0.666667     0.0
99556       0.0          0.0         0.0     0.0   0.000000     0.0
99557       0.0          0.0         0.0     0.0   0.000000     0.0

[3118 rows x 6 columns]


In [6]:
# Create Target Variable
subset_df['nv'] = np.where(subset_df[['eggsno_q', 'fishprawn_q', 'goatmeat_q', 'beef_q', 'chicken_q', 'pork_q']].sum(axis=1) > 0, 1, 0)
print(subset_df.head())
print(subset_df['nv'].describe())

       eggsno_q  fishprawn_q  goatmeat_q  beef_q  chicken_q  pork_q  nv
36170       0.0          0.0         0.0     0.0        0.0     0.0   0
36171       0.0          0.0         0.0     0.0        0.0     0.0   0
36172       0.0          0.0         0.0     0.0        0.0     0.0   0
36173       0.0          0.0         0.0     0.0        0.0     0.0   0
36174       0.0          0.0         0.0     0.0        0.0     0.0   0
count    3118.000000
mean        0.238294
std         0.426108
min         0.000000
25%         0.000000
50%         0.000000
75%         0.000000
max         1.000000
Name: nv, dtype: float64


In [7]:
# Fit the probit regression model
probit_model = smf.probit('nv ~ eggsno_q + fishprawn_q + goatmeat_q + beef_q + chicken_q + pork_q', data=subset_df).fit()
print(probit_model.summary())

         Current function value: 0.003330
         Iterations: 35
                          Probit Regression Results                           
Dep. Variable:                     nv   No. Observations:                 3118
Model:                         Probit   Df Residuals:                     3111
Method:                           MLE   Df Model:                            6
Date:                Mon, 01 Jul 2024   Pseudo R-squ.:                  0.9939
Time:                        21:16:51   Log-Likelihood:                -10.382
converged:                      False   LL-Null:                       -1712.1
Covariance Type:            nonrobust   LLR p-value:                     0.000
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
Intercept      -3.1172      0.205    -15.184      0.000      -3.520      -2.715
eggsno_q     1.156e+05   1.66e+04      6.950      0.000     8.



In [8]:
# Calculate marginal effects
marginal_effects = probit_model.get_margeff()
print(marginal_effects.summary())

       Probit Marginal Effects       
Dep. Variable:                     nv
Method:                          dydx
At:                           overall
                 dy/dx    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
eggsno_q      359.4677    160.288      2.243      0.025      45.308     673.627
fishprawn_q     0.2891    277.370      0.001      0.999    -543.346     543.924
goatmeat_q      0.5023     20.768      0.024      0.981     -40.203      41.207
beef_q          0.1790   3.66e+11    4.9e-13      1.000   -7.16e+11    7.16e+11
chicken_q       0.4137     29.677      0.014      0.989     -57.752      58.579
pork_q          0.0777      5.833      0.013      0.989     -11.354      11.510
