In [1]:
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pylab as plt

In [2]:
def load_data_Urbun(sheet):
    df_urban = {}
    years = ['98', '99', '1400', '1401']
    for year in years:
        file_name = f'U{year}.xlsx'
        sheet_name = f'U{year}{sheet}'
        df = pd.read_excel(file_name, sheet_name = sheet_name)
        df_urban[f'U{year}'] = df
    return df_urban

In [3]:
def load_data_Rural(sheet):
    df_Rural = {}
    years = ['98', '99', '1400', '1401']
    for year in years:
        file_name = f'R{year}.xlsx'
        sheet_name = f'R{year}{sheet}'
        df = pd.read_excel(file_name, sheet_name = sheet_name)
        df_Rural[f'R{year}'] = df
    return df_Rural

In [4]:
Rural_Data = load_data_Rural('Data')
Urbun_Data = load_data_Urbun('Data')

In [5]:
Rural_data_income_paid_jobs = load_data_Rural('P4S01')
Urbun_data_income_paid_jobs = load_data_Urbun('P4S01')

In [6]:
Rural_data_income_freelance_jobs = load_data_Rural('P4S02')
Urbun_data_income_freelance_jobs = load_data_Urbun('P4S02')

In [7]:
Rural_data_income_Miscellaneous = load_data_Rural('P4S03')
Urbun_data_income_Miscellaneous = load_data_Urbun('P4S03')

In [8]:
Rural_data_income_subsidy = load_data_Rural('P4S04')
Urbun_data_income_subsidy = load_data_Urbun('P4S04')

In [9]:
Selected_address = {}
for key, value in Rural_Data.items():
    Selected_address[key] = value[value['province'] == 'CharmahalBakhtiari']['Address'].unique()

for key, value in Urbun_Data.items():
    Selected_address[key] = value[value['province'] == 'CharmahalBakhtiari']['Address'].unique()

In [10]:
# Paid jobs
for key, value in Rural_data_income_paid_jobs.items():
    Rural_data_income_paid_jobs[key] = value[value['Address'].isin(Selected_address[key])]
for key, value in Urbun_data_income_paid_jobs.items():
    Urbun_data_income_paid_jobs[key] = value[value['Address'].isin(Selected_address[key])]

# Freelance jobs
for key, value in Rural_data_income_freelance_jobs.items():
    Rural_data_income_freelance_jobs[key] = value[value['Address'].isin(Selected_address[key])]
for key, value in Urbun_data_income_freelance_jobs.items():
    Urbun_data_income_freelance_jobs[key] = value[value['Address'].isin(Selected_address[key])]

# Miscellaneous
for key, value in Rural_data_income_Miscellaneous.items():
    Rural_data_income_Miscellaneous[key] = value[value['Address'].isin(Selected_address[key])]
for key, value in Urbun_data_income_Miscellaneous.items():
    Urbun_data_income_Miscellaneous[key] = value[value['Address'].isin(Selected_address[key])]

# Subsidy
for key, value in Rural_data_income_subsidy.items():
    Rural_data_income_subsidy[key] = value[value['Address'].isin(Selected_address[key])]
for key, value in Urbun_data_income_subsidy.items():
    Urbun_data_income_subsidy[key] = value[value['Address'].isin(Selected_address[key])]

In [11]:
Urbun_data_income_freelance_jobs['U1401']['income_s_y'] = Urbun_data_income_freelance_jobs['U1401']['income_s_y'].astype('float')

In [12]:
Rural_data_income_freelance_jobs['R1401'].loc[1571, 'income_s_y'] = '0'

In [13]:
for key, value in Urbun_data_income_Miscellaneous.items() :
    for col, value_d in value.dtypes.items() :
        if value_d == 'object' :
            Urbun_data_income_Miscellaneous[key][col] = value[col].fillna(0).astype('float')

for key, value in Rural_data_income_Miscellaneous.items() :
    for col, value_d in value.dtypes.items() :
        if value_d == 'object' :
            Rural_data_income_Miscellaneous[key][col] = value[col].fillna(0).astype('float')

In [14]:
for key, value in Urbun_data_income_subsidy.items() :
    for col, value_d in value.dtypes.items() :
        if value_d == 'object' :
            Urbun_data_income_subsidy[key][col] = value[col].fillna(0).astype('float')

for key, value in Rural_data_income_subsidy.items() :
    for col, value_d in value.dtypes.items() :
        if value_d == 'object' :
            Rural_data_income_subsidy[key][col] = value[col].fillna(0).astype('float')

#### Group By

In [15]:
# Paid Jobs
paid_jobs = {}
for key, value in Rural_data_income_paid_jobs.items():
    Rural_data_income_paid_jobs[key].loc[:, 'netincome_w_y'] = Rural_data_income_paid_jobs[key]['netincome_w_y'].fillna(0).astype('float')
    paid_jobs[key] = value.groupby('Address')[['netincome_w_y']].agg('sum').reset_index()
for key, value in Urbun_data_income_paid_jobs.items():
    Urbun_data_income_paid_jobs[key].loc[:, 'netincome_w_y'] = Urbun_data_income_paid_jobs[key]['netincome_w_y'].fillna(0).astype('float')
    paid_jobs[key] = value.groupby('Address')[['netincome_w_y']].agg('sum').reset_index()
    
# Freelance Jobs
Freelance_Jobs = {}
for key, value in Rural_data_income_freelance_jobs.items() :
    Rural_data_income_freelance_jobs[key].loc[:, 'income_s_y'] = Rural_data_income_freelance_jobs[key]['income_s_y'].fillna(0).astype('float')
    Freelance_Jobs[key] = value.groupby('Address')[['income_s_y']].agg('sum').reset_index()
for key, value in Urbun_data_income_freelance_jobs.items() :
    Urbun_data_income_freelance_jobs[key].loc[:, 'income_s_y'] = Urbun_data_income_freelance_jobs[key]['income_s_y'].fillna(0).astype('float')
    Freelance_Jobs[key] = value.groupby('Address')[['income_s_y']].agg('sum').reset_index()
    
# Miscellaneous
Miscellaneous = {}
selected_columns = ['income_pension', 'income_rent', 'income_interest', 'income_aid', 'income_resale', 'income_transfer']
for key in Rural_data_income_Miscellaneous.keys():
    Miscellaneous[key] = Rural_data_income_Miscellaneous[key].groupby('Address')[selected_columns].agg('sum').reset_index()
for key in Urbun_data_income_Miscellaneous.keys():
    Miscellaneous[key] = Urbun_data_income_Miscellaneous[key].groupby('Address')[selected_columns].agg('sum').reset_index()
    
# Subsidy 
Subsidy = {}
for key, value in Rural_data_income_subsidy.items():
    Subsidy[key] = value.groupby('Address')[['subsidy']].agg('sum').reset_index()
for key, value in Urbun_data_income_subsidy.items():
    Subsidy[key] = value.groupby('Address')[['subsidy']].agg('sum').reset_index()

In [16]:
def process_income_data_Urbun(years):
    ans = {}
    for year in years :
        
        df = paid_jobs[f'U{year}'].merge(Freelance_Jobs[f'U{year}'], how = 'outer')

        df = df.merge(Miscellaneous[f'U{year}'], how = 'outer')

        df = df.merge(Subsidy[f'U{year}'], how='outer')
        
        ans[f'number{year}'] = df['Address'].nunique()

        df = df.drop(columns='Address')

        df.dropna(how='all', inplace=True)

        df.fillna(0, inplace=True)

        df = df.astype('float')

        df = df.sum(axis=1)
                
        ans[f'{year}'] = df
        
    return ans

In [17]:
def process_income_data_Rural(years):
    ans = {}
    for year in years :
        
        df = paid_jobs[f'R{year}'].merge(Freelance_Jobs[f'R{year}'], how = 'outer')

        df = df.merge(Miscellaneous[f'R{year}'], how = 'outer')

        df = df.merge(Subsidy[f'R{year}'], how='outer')
        
        ans[f'number{year}'] = df['Address'].nunique()

        df = df.drop(columns='Address')

        df.dropna(how='all', inplace=True)

        df.fillna(0, inplace=True)

        df = df.astype('float')

        df = df.sum(axis=1)
                
        ans[f'{year}'] = df
            
    return ans

In [18]:
years = ['98', '99', '1400', '1401']
Urban_data_income = process_income_data_Urbun(years)
Rural_data_income = process_income_data_Rural(years)

  df.fillna(0, inplace=True)


In [19]:
for year in years :
    Urban_data_income[year].dropna(inplace=True)
    Rural_data_income[year].dropna(inplace=True)

In [21]:
X = Rural_data_income['98']

### فرضیات

- **H0**: درآمد خانوارهای شهری و روستایی در استان چهارمحال و بختیاری با هم برابر است
- **H1**: درآمد خانوارهای شهری و روستایی در استان چهارمحال و بختیاری با هم برابر نیست
- **ALPHA** : 0.05


#### 98

In [22]:
shapiro_R98_zscore = stats.shapiro(Rural_data_income['98'])
shapiro_U98_zscore = stats.shapiro(Urban_data_income['98'])
shapiro_R98_zscore, shapiro_U98_zscore

(ShapiroResult(statistic=0.8730337948969712, pvalue=3.10940011587708e-19),
 ShapiroResult(statistic=0.7227596972899459, pvalue=2.114277209279307e-31))

- اینجا پی ولیو خیلی کمتر از الفا پس داده ها نرمال نیستن

In [23]:
stats.mannwhitneyu(Rural_data_income['98'], Urban_data_income['98'])

MannwhitneyuResult(statistic=108213.0, pvalue=2.651191958137599e-17)

- اینجا تو آزمون "یو" چون "پی ولیو" خیلی کمتر از آلقا پس فرض صفر رد میشه و فرض متقابلش برقراه

- تلاش میکنیم داده ها رو نرمال کنیم اگه نرمال شدن نتیجه تست "تی" رو با تست "یو" مقایشه میکنیم ببینیم که آیا واقعا فرض صفر رد میشه یا نه

In [24]:
R_98_yeojohnson , _ = stats.yeojohnson(Rural_data_income['98'])
U_98_yeojohnson , _ = stats.yeojohnson(Urban_data_income['98'])
shapiro_R_98_yeojohnson = stats.shapiro(R_98_yeojohnson)
shapiro_U_98_yeojohnson = stats.shapiro(U_98_yeojohnson)
shapiro_R_98_yeojohnson, shapiro_U_98_yeojohnson

(ShapiroResult(statistic=0.888689671015572, pvalue=5.064260624511144e-18),
 ShapiroResult(statistic=0.7558179702687562, pvalue=7.431497996659441e-30))

In [37]:
R_98_log = np.log(Rural_data_income['98'])
U_98_log = np.log(Urban_data_income['98'])

R_98_log = R_98_log[~np.isinf(R_98_log)]
U_98_log = U_98_log[~np.isinf(U_98_log)]

U_98_log.dropna(inplace= True)
R_98_log.dropna(inplace= True)


shapiro_R_98_log = stats.shapiro(R_98_log)
shapiro_U_98_log = stats.shapiro(U_98_log)

shapiro_R_98_log, shapiro_U_98_log

(ShapiroResult(statistic=0.9558175332181926, pvalue=1.0659530227962505e-10),
 ShapiroResult(statistic=0.9571190219039325, pvalue=8.84229456058444e-13))

In [35]:
R_98_sqrt = np.sqrt(Rural_data_income['98'])
U_98_sqrt = np.sqrt(Urban_data_income['98'])

R_98_sqrt = R_98_sqrt[~np.isinf(R_98_sqrt)]
U_98_sqrt = U_98_sqrt[~np.isinf(U_98_sqrt)]

R_98_sqrt.dropna(inplace= True)
U_98_sqrt.dropna(inplace= True)


shapiro_R_98_sqrt = stats.shapiro(R_98_sqrt)
shapiro_U_98_sqrt = stats.shapiro(U_98_sqrt)

shapiro_R_98_sqrt, shapiro_U_98_sqrt

  result = getattr(ufunc, method)(*inputs, **kwargs)


(ShapiroResult(statistic=0.9791868852150308, pvalue=2.7262577722556606e-06),
 ShapiroResult(statistic=0.9399689003138613, pvalue=1.779260928418208e-15))

- تلاش برای نرمال سازی شکست خورد پس فرض صفر همچنان رد میمونه

#### 99

In [25]:
shapiro_R_99 = stats.shapiro(Rural_data_income['99'])
shapiro_U_99 = stats.shapiro(Urban_data_income['99'])
shapiro_R_99, shapiro_U_99

(ShapiroResult(statistic=0.829119523983219, pvalue=2.4774108583506565e-22),
 ShapiroResult(statistic=0.871013548234989, pvalue=1.6928289209323643e-22))

- اینجا پی ولیو خیلی کمتر از الفا پس داده ها نرمال نیستن

In [26]:
stats.mannwhitneyu(Rural_data_income['99'], Urban_data_income['99'])

MannwhitneyuResult(statistic=118671.0, pvalue=6.934803787250026e-11)

- اینجا تو آزمون "یو" چون "پی ولیو" خیلی کمتر از آلقا پس فرض صفر رد میشه و فرض متقابلش برقراه

- تلاش میکنیم داده ها رو نرمال کنیم اگه نرمال شدن نتیجه تست "تی" رو با تست "یو" مقایشه میکنیم ببینیم که آیا واقعا فرض صفر رد میشه یا نه

In [27]:
R_99_yeojohnson , _ = stats.yeojohnson(Rural_data_income['99'])
U_99_yeojohnson , _ = stats.yeojohnson(Urban_data_income['99'])
shapiro_R_99_yeojohnson = stats.shapiro(R_99_yeojohnson)
shapiro_U_99_yeojohnson = stats.shapiro(U_99_yeojohnson)
shapiro_R_99_yeojohnson, shapiro_U_99_yeojohnson

(ShapiroResult(statistic=0.9885980449150746, pvalue=0.0008116157218315527),
 ShapiroResult(statistic=0.8757329109706921, pvalue=4.091328419084555e-22))

In [32]:
R_99_log = np.log(Rural_data_income['99'])
U_99_log = np.log(Urban_data_income['99'])

R_99_log = R_99_log[~np.isnan(R_99_log)]
U_99_log = U_99_log[~np.isinf(U_99_log)]

R_99_log = R_99_log[~np.isnan(R_99_log)]
U_99_log = U_99_log[~np.isinf(U_99_log)]

U_99_log.dropna(inplace= True)

shapiro_R_99_log = stats.shapiro(R_99_log)
shapiro_U_99_log = stats.shapiro(U_99_log)

shapiro_R_99_log, shapiro_U_99_log

(ShapiroResult(statistic=0.9668133163064825, pvalue=5.457106813509286e-09),
 ShapiroResult(statistic=0.9592474727552384, pvalue=2.8918308307996402e-12))

In [31]:
R_99_sqrt = np.sqrt(Rural_data_income['99'])
U_99_sqrt = np.sqrt(Urban_data_income['99'])

U_99_sqrt = U_99_sqrt[~np.isinf(U_99_sqrt)]

U_99_sqrt = U_99_sqrt[~np.isinf(U_99_sqrt)]

U_99_sqrt.dropna(inplace= True)

shapiro_R_99_sqrt = stats.shapiro(R_99_sqrt)
shapiro_U_99_sqrt = stats.shapiro(U_99_sqrt)

shapiro_R_99_sqrt, shapiro_U_99_sqrt

(ShapiroResult(statistic=0.9728331382016107, pvalue=8.2272761317294e-08),
 ShapiroResult(statistic=0.9744411425807096, pvalue=4.423530372506113e-09))

- تلاش برای نرمال سازی شکست خورد پس فرض صفر همچنان رد میمونه

#### 1400

In [39]:
shapiro_R_1400 = stats.shapiro(Rural_data_income['1400'])
shapiro_U_1400 = stats.shapiro(Urban_data_income['1400'])
shapiro_R_1400, shapiro_U_1400

(ShapiroResult(statistic=0.8104428505131657, pvalue=1.3801033552846836e-23),
 ShapiroResult(statistic=0.7670912196125298, pvalue=2.3730154825564715e-29))

In [41]:
stats.mannwhitneyu(Rural_data_income['1400'], Urban_data_income['1400'])

MannwhitneyuResult(statistic=120943.5, pvalue=1.8454659027508173e-12)

- اینجا تو آزمون "یو" چون "پی ولیو" خیلی کمتر از آلقا پس فرض صفر رد میشه و فرض متقابلش برقراه

- تلاش میکنیم داده ها رو نرمال کنیم اگه نرمال شدن نتیجه تست "تی" رو با تست "یو" مقایشه میکنیم ببینیم که آیا واقعا فرض صفر رد میشه یا نه

In [42]:
R_1400_yeojohnson , _ = stats.yeojohnson(Rural_data_income['1400'])
U_1400_yeojohnson , _ = stats.yeojohnson(Urban_data_income['1400'])
shapiro_R_1400_yeojohnson = stats.shapiro(R_1400_yeojohnson)
shapiro_U_1400_yeojohnson = stats.shapiro(U_1400_yeojohnson)
shapiro_R_1400_yeojohnson, shapiro_U_1400_yeojohnson

(ShapiroResult(statistic=0.9813787343631815, pvalue=6.2772685456658566e-06),
 ShapiroResult(statistic=0.9833264088667423, pvalue=9.046024448765705e-07))

In [43]:
R_1400_log = np.log(Rural_data_income['1400'])
U_1400_log = np.log(Urban_data_income['1400'])
shapiro_R_1400_log = stats.shapiro(R_1400_log)
shapiro_U_1400_log = stats.shapiro(U_1400_log)
shapiro_R_1400_log, shapiro_U_1400_log

(ShapiroResult(statistic=0.9581956861546164, pvalue=1.394149862677069e-10),
 ShapiroResult(statistic=0.9550926801437308, pvalue=3.403321603924779e-13))

In [44]:
R_1400_sqrt = np.sqrt(Rural_data_income['1400'])
U_1400_sqrt = np.sqrt(Urban_data_income['1400'])
shapiro_R_1400_sqrt = stats.shapiro(R_1400_sqrt)
shapiro_U_1400_sqrt = stats.shapiro(U_1400_sqrt)
shapiro_R_1400_sqrt, shapiro_U_1400_sqrt

(ShapiroResult(statistic=0.9631447054829181, pvalue=9.421620455935167e-10),
 ShapiroResult(statistic=0.9582678591392096, pvalue=1.2356045923664703e-12))

- تلاش برای نرمال سازی شکست خورد پس فرض صفر همچنان رد میمونه

#### 1401

In [45]:
shapiro_R_1401 = stats.shapiro(Rural_data_income['1401'])
shapiro_U_1401 = stats.shapiro(Urban_data_income['1401'])
shapiro_R_1401, shapiro_U_1401

(ShapiroResult(statistic=0.8624497765713363, pvalue=2.855842062959131e-20),
 ShapiroResult(statistic=0.8955742359464116, pvalue=1.0267914922423725e-20))

In [46]:
stats.mannwhitneyu(Rural_data_income['1401'], Urban_data_income['1401'])

MannwhitneyuResult(statistic=122594.0, pvalue=1.3230455008288734e-11)

- اینجا تو آزمون "یو" چون "پی ولیو" خیلی کمتر از آلقا پس فرض صفر رد میشه و فرض متقابلش برقراه

- تلاش میکنیم داده ها رو نرمال کنیم اگه نرمال شدن نتیجه تست "تی" رو با تست "یو" مقایشه میکنیم ببینیم که آیا واقعا فرض صفر رد میشه یا نه

In [47]:
R_1401_yeojohnson , _ = stats.yeojohnson(Rural_data_income['1401'])
U_1401_yeojohnson , _ = stats.yeojohnson(Urban_data_income['1401'])
shapiro_R_1401_yeojohnson = stats.shapiro(R_1401_yeojohnson)
shapiro_U_1401_yeojohnson = stats.shapiro(U_1401_yeojohnson)
shapiro_R_1401_yeojohnson, shapiro_U_1401_yeojohnson

(ShapiroResult(statistic=0.9897078174328108, pvalue=0.0017231732480825481),
 ShapiroResult(statistic=0.9925084289817818, pvalue=0.0021912123017287194))

In [49]:
R_1401_log = np.log(Rural_data_income['1401'])
U_1401_log = np.log(Urban_data_income['1401'])
shapiro_R_1401_log = stats.shapiro(R_1401_log)
shapiro_U_1401_log = stats.shapiro(U_1401_log)
shapiro_R_1401_log, shapiro_U_1401_log

(ShapiroResult(statistic=0.961430831730584, pvalue=5.413701309021109e-10),
 ShapiroResult(statistic=0.9377086643535099, pvalue=6.096595885710601e-16))

In [50]:
R_1401_sqrt = np.sqrt(Rural_data_income['1401'])
U_1401_sqrt = np.sqrt(Urban_data_income['1401'])
shapiro_R_1401_sqrt = stats.shapiro(R_1401_sqrt)
shapiro_U_1401_sqrt = stats.shapiro(U_1401_sqrt)
shapiro_R_1401_sqrt, shapiro_U_1401_sqrt

(ShapiroResult(statistic=0.977374987516453, pvalue=7.322164232326638e-07),
 ShapiroResult(statistic=0.9878031647453357, pvalue=2.7290995836788253e-05))

- تلاش برای نرمال سازی شکست خورد پس فرض صفر همچنان رد میمونه

#### Total

In [58]:
def Anderson(result) :
    print(f"Statistic: {result.statistic}")
    print(f"Critical Values: {result.critical_values}")
    print(f"Significance Levels: {result.significance_level}")
    
    for i in range(len(result.critical_values)):
        if result.statistic > result.critical_values[i]:
            print(f"At the {result.significance_level[i]}% significance level, data is NOT normal.")
        else:
            print(f"At the {result.significance_level[i]}% significance level, data is normal.")

In [51]:
df_U = paid_jobs[f'U98'].merge(Freelance_Jobs[f'U98'], how = 'outer')

df_U = df_U.merge(Miscellaneous[f'U98'], how = 'outer')

df_U = df_U.merge(Subsidy[f'U98'], how='outer')

df_U = df_U.merge(paid_jobs[f'U99'], how = 'outer')

df_U = df_U.merge(Freelance_Jobs[f'U99'], how = 'outer')

df_U = df_U.merge(Miscellaneous[f'U99'], how = 'outer')

df_U = df_U.merge(Subsidy[f'U99'], how='outer')

df_U = df_U.merge(paid_jobs[f'U1400'], how = 'outer')

df_U = df_U.merge(Freelance_Jobs[f'U1400'], how = 'outer')

df_U = df_U.merge(Miscellaneous[f'U1400'], how = 'outer')

df_U = df_U.merge(Subsidy[f'U1400'], how='outer')

df_U = df_U.merge(paid_jobs[f'U1401'], how = 'outer')

df_U = df_U.merge(Freelance_Jobs[f'U1401'], how = 'outer')

df_U = df_U.merge(Miscellaneous[f'U1401'], how = 'outer')

df_U = df_U.merge(Subsidy[f'U1401'], how='outer')

In [52]:
Total_Urban = df_U.drop(columns='Address').fillna(0).astype('float').sum(axis = 1)

In [53]:
df_R = paid_jobs[f'R98'].merge(Freelance_Jobs[f'R98'], how = 'outer')

df_R = df_R.merge(Miscellaneous[f'R98'], how = 'outer')

df_R = df_R.merge(Subsidy[f'R98'], how='outer')

df_R = df_R.merge(paid_jobs[f'R99'], how = 'outer')

df_R = df_R.merge(Freelance_Jobs[f'R99'], how = 'outer')

df_R = df_R.merge(Miscellaneous[f'R99'], how = 'outer')

df_R = df_R.merge(Subsidy[f'R99'], how='outer')

df_R = df_R.merge(paid_jobs[f'R1400'], how = 'outer')

df_R = df_R.merge(Freelance_Jobs[f'R1400'], how = 'outer')

df_R = df_R.merge(Miscellaneous[f'R1400'], how = 'outer')

df_R = df_R.merge(Subsidy[f'R1400'], how='outer')

df_R = df_R.merge(paid_jobs[f'R1401'], how = 'outer')

df_R = df_R.merge(Freelance_Jobs[f'R1401'], how = 'outer')

df_R = df_R.merge(Miscellaneous[f'R1401'], how = 'outer')

df_R = df_R.merge(Subsidy[f'R1401'], how='outer')

In [54]:
Total_Rural = df_R.drop(columns='Address').fillna(0).astype('float').sum(axis = 1)

  Total_Rural = df_R.drop(columns='Address').fillna(0).astype('float').sum(axis = 1)


In [59]:
anderson_Total_Rural = stats.anderson(Total_Rural)
anderson_Total_Urban = stats.anderson(Total_Urban)
Anderson(anderson_Total_Rural)
print()
Anderson(anderson_Total_Urban)

Statistic: 424.79685521963256
Critical Values: [0.576 0.655 0.786 0.917 1.091]
Significance Levels: [15.  10.   5.   2.5  1. ]
At the 15.0% significance level, data is NOT normal.
At the 10.0% significance level, data is NOT normal.
At the 5.0% significance level, data is NOT normal.
At the 2.5% significance level, data is NOT normal.
At the 1.0% significance level, data is NOT normal.

Statistic: 474.3031062253176
Critical Values: [0.576 0.656 0.786 0.917 1.091]
Significance Levels: [15.  10.   5.   2.5  1. ]
At the 15.0% significance level, data is NOT normal.
At the 10.0% significance level, data is NOT normal.
At the 5.0% significance level, data is NOT normal.
At the 2.5% significance level, data is NOT normal.
At the 1.0% significance level, data is NOT normal.


In [61]:
stats.mannwhitneyu(Total_Rural, Total_Urban)

MannwhitneyuResult(statistic=13054720.5, pvalue=7.723246404175944e-10)

- اینجا تو آزمون "یو" چون "پی ولیو" خیلی کمتر از آلقا پس فرض صفر رد میشه و فرض متقابلش برقراه

- تلاش میکنیم داده ها رو نرمال کنیم اگه نرمال شدن نتیجه تست "تی" رو با تست "یو" مقایشه میکنیم ببینیم که آیا واقعا فرض صفر رد میشه یا نه

In [67]:
R_Total_yeojohnson , _ = stats.yeojohnson(Total_Rural)
U_Total_yeojohnson , _ = stats.yeojohnson(Total_Urban)
anderson_R_Total_yeojohnson = stats.anderson(R_Total_yeojohnson)
anderson_U_Total_yeojohnson = stats.anderson(U_Total_yeojohnson)
Anderson(anderson_R_Total_yeojohnson)
print()
Anderson(anderson_U_Total_yeojohnson)

Statistic: 383.8079699924583
Critical Values: [0.576 0.655 0.786 0.917 1.091]
Significance Levels: [15.  10.   5.   2.5  1. ]
At the 15.0% significance level, data is NOT normal.
At the 10.0% significance level, data is NOT normal.
At the 5.0% significance level, data is NOT normal.
At the 2.5% significance level, data is NOT normal.
At the 1.0% significance level, data is NOT normal.

Statistic: 435.75995730193426
Critical Values: [0.576 0.656 0.786 0.917 1.091]
Significance Levels: [15.  10.   5.   2.5  1. ]
At the 15.0% significance level, data is NOT normal.
At the 10.0% significance level, data is NOT normal.
At the 5.0% significance level, data is NOT normal.
At the 2.5% significance level, data is NOT normal.
At the 1.0% significance level, data is NOT normal.


In [69]:
R_Total_log = np.log(Total_Rural)
U_Total_log = np.log(Total_Urban)

R_Total_log = R_Total_log[~np.isinf(R_Total_log)]
U_Total_log = U_Total_log[~np.isinf(U_Total_log)]

R_Total_log.dropna(inplace= True)
U_Total_log.dropna(inplace= True)

anderson_R_Total_log = stats.anderson(R_Total_log)
anderson_U_Total_log = stats.anderson(U_Total_log)
Anderson(anderson_R_Total_log)
print()
Anderson(anderson_U_Total_log)

Statistic: 44.099673005283876
Critical Values: [0.576 0.655 0.786 0.917 1.091]
Significance Levels: [15.  10.   5.   2.5  1. ]
At the 15.0% significance level, data is NOT normal.
At the 10.0% significance level, data is NOT normal.
At the 5.0% significance level, data is NOT normal.
At the 2.5% significance level, data is NOT normal.
At the 1.0% significance level, data is NOT normal.

Statistic: 103.88084402521963
Critical Values: [0.576 0.656 0.786 0.917 1.091]
Significance Levels: [15.  10.   5.   2.5  1. ]
At the 15.0% significance level, data is NOT normal.
At the 10.0% significance level, data is NOT normal.
At the 5.0% significance level, data is NOT normal.
At the 2.5% significance level, data is NOT normal.
At the 1.0% significance level, data is NOT normal.


  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


In [70]:
R_Total_sqrt = np.sqrt(Total_Rural)
U_Total_sqrt = np.sqrt(Total_Urban)

R_Total_sqrt = R_Total_sqrt[~np.isinf(R_Total_sqrt)]
U_Total_sqrt = U_Total_sqrt[~np.isinf(U_Total_sqrt)]

R_Total_sqrt.dropna(inplace= True)
U_Total_sqrt.dropna(inplace= True)

anderson_R_Total_sqrt = stats.anderson(R_Total_sqrt)
anderson_U_Total_sqrt = stats.anderson(U_Total_sqrt)
Anderson(anderson_R_Total_sqrt)
print()
Anderson(anderson_U_Total_sqrt)

Statistic: 140.69218330856893
Critical Values: [0.576 0.655 0.786 0.917 1.091]
Significance Levels: [15.  10.   5.   2.5  1. ]
At the 15.0% significance level, data is NOT normal.
At the 10.0% significance level, data is NOT normal.
At the 5.0% significance level, data is NOT normal.
At the 2.5% significance level, data is NOT normal.
At the 1.0% significance level, data is NOT normal.

Statistic: 176.47888926566702
Critical Values: [0.576 0.656 0.786 0.917 1.091]
Significance Levels: [15.  10.   5.   2.5  1. ]
At the 15.0% significance level, data is NOT normal.
At the 10.0% significance level, data is NOT normal.
At the 5.0% significance level, data is NOT normal.
At the 2.5% significance level, data is NOT normal.
At the 1.0% significance level, data is NOT normal.


  result = getattr(ufunc, method)(*inputs, **kwargs)


- رد میمونه 

با توجه به نتایج میشه با درصد اطمینان بالایی گفت که فرض صفر رد میشه چه در به تفکیک سال و چه در طول 4 سال