In [6]:
import pandas as pd
from scipy.stats import chi2_contingency, f_oneway

# Load the dataset
file_path = 'C:/Users/user/Desktop/진짜임 이게 찐.csv'  # 예: '/mnt/data/진짜임 이게 찐.csv'
data = pd.read_csv(file_path, encoding='cp949')  # 파일에 따라 인코딩 변경 가능

# List of categorical columns
categorical_columns = ['토양깊이유형', '토성코드', '토양형코드', '토양유효수분량']

# Ensure categorical columns are treated as objects
for col in categorical_columns:
    data[col] = data[col].astype('object')

# Ensure production columns are numeric
production_columns = ['밤 (kg)', '복분자딸기 (kg)', '오갈피 (kg)', '마 (kg)', '도라지 (kg)', '더덕 (kg)', '생표고 (kg)']
for col in production_columns:
    data[col] = pd.to_numeric(data[col], errors='coerce').fillna(0)

# Initialize dictionaries to store results
chi2_results_all = {}
anova_results_all = {}

# Perform Chi-Square and ANOVA for each production column
for prod_col in production_columns:
    # Chi-Square test
    chi2_results = {
        cat_col: {
            'p-value': chi2_contingency(pd.crosstab(data[cat_col], data[prod_col]))[1]
        }
        for cat_col in categorical_columns
    }

    # ANOVA test
    anova_results = {
        cat_col: {
            'p-value': f_oneway(*[group[prod_col].values for name, group in data.groupby(cat_col)])[1]
        }
        for cat_col in categorical_columns
    }

    # Store results
    chi2_results_all[prod_col] = pd.DataFrame.from_dict(chi2_results, orient='index', columns=['p-value'])
    anova_results_all[prod_col] = pd.DataFrame.from_dict(anova_results, orient='index', columns=['p-value'])

# Display all results
print("All Chi-Square Test Results:")
for prod_col, result in chi2_results_all.items():
    print(f"\n{prod_col}:\n", result)

print("\nAll ANOVA Test Results:")
for prod_col, result in anova_results_all.items():
    print(f"\n{prod_col}:\n", result)

# Filter significant results (p-value < 0.05)
chi2_significant = {
    prod_col: result[result['p-value'] < 0.05]
    for prod_col, result in chi2_results_all.items()
}
anova_significant = {
    prod_col: result[result['p-value'] < 0.05]
    for prod_col, result in anova_results_all.items()
}

# Display significant results
print("\nSignificant Chi-Square Test Results (p-value < 0.05):")
for prod_col, result in chi2_significant.items():
    if not result.empty:
        print(f"\n{prod_col}:\n", result)

print("\nSignificant ANOVA Test Results (p-value < 0.05):")
for prod_col, result in anova_significant.items():
    if not result.empty:
        print(f"\n{prod_col}:\n", result)


All Chi-Square Test Results:

밤 (kg):
           p-value
토양깊이유형   0.224119
토성코드     0.561410
토양형코드    1.000000
토양유효수분량  0.773393

복분자딸기 (kg):
           p-value
토양깊이유형   0.003137
토성코드     0.933116
토양형코드    0.008664
토양유효수분량  0.015607

오갈피 (kg):
           p-value
토양깊이유형   0.999166
토성코드     0.740545
토양형코드    0.000026
토양유효수분량  0.929743

마 (kg):
           p-value
토양깊이유형   1.000000
토성코드     0.999997
토양형코드    1.000000
토양유효수분량  0.810104

도라지 (kg):
           p-value
토양깊이유형   0.711105
토성코드     0.736817
토양형코드    0.665482
토양유효수분량  0.837462

더덕 (kg):
           p-value
토양깊이유형   0.999999
토성코드     0.455138
토양형코드    0.001237
토양유효수분량  0.337869

생표고 (kg):
               p-value
토양깊이유형   1.000000e+00
토성코드     3.169918e-01
토양형코드    2.564120e-03
토양유효수분량  1.355207e-07

All ANOVA Test Results:

밤 (kg):
           p-value
토양깊이유형   0.880416
토성코드     0.156010
토양형코드    0.543478
토양유효수분량  0.450064

복분자딸기 (kg):
               p-value
토양깊이유형   9.666846e-01
토성코드     6.528709e-01
토양형코드    2.588276e-01
토양유효수분량  4.40

In [9]:
import pandas as pd
from scipy.stats import spearmanr

def calculate_spearman_correlation(data, production_col, categorical_cols):
    """
    Calculate Spearman correlation coefficients for a given production column, excluding rows where production is 0.

    Parameters:
        data (DataFrame): The input dataset.
        production_col (str): The production column to analyze.
        categorical_cols (list): The list of categorical columns to analyze.

    Returns:
        spearman_results_df (DataFrame): DataFrame of Spearman correlation coefficients and p-values.
    """
    # Filter the data to exclude rows where production is 0
    filtered_data = data[data[production_col] != 0].copy()

    # Ensure categorical columns are numeric for correlation calculation
    for col in categorical_cols:
        filtered_data[col] = pd.factorize(filtered_data[col])[0]

    # Calculate Spearman correlation for each categorical column
    spearman_results = {
        col: {
            'Spearman Correlation': spearmanr(filtered_data[col], filtered_data[production_col])[0],
            'p-value': spearmanr(filtered_data[col], filtered_data[production_col])[1]
        }
        for col in categorical_cols
    }

    # Convert results into a DataFrame
    spearman_results_df = pd.DataFrame.from_dict(spearman_results, orient='index')

    return spearman_results_df

# Load the dataset
file_path = 'C:/Users/user/Desktop/진짜임 이게 찐.csv'  # 예: '/mnt/data/진짜임 이게 찐.csv'
data = pd.read_csv(file_path, encoding='cp949')

# List of categorical columns
categorical_columns = ['토양깊이유형', '토성코드', '토양형코드', '토양유효수분량']

# Ensure production columns are numeric
production_columns = ['밤 (kg)', '복분자딸기 (kg)', '오갈피 (kg)', '마 (kg)', '도라지 (kg)', '더덕 (kg)', '생표고 (kg)']
for col in production_columns:
    data[col] = pd.to_numeric(data[col], errors='coerce').fillna(0)

# Iterate over all production columns and calculate Spearman correlation
all_spearman_results = {}
significant_spearman_results = {}

for production_col in production_columns:
    print(f"Calculating Spearman correlation for {production_col}...")
    spearman_results = calculate_spearman_correlation(data, production_col, categorical_columns)
    all_spearman_results[production_col] = spearman_results

    # Save each result to a separate CSV
    output_file_all = f'spearman_results_{production_col.replace(" ", "_")}.csv'
    spearman_results.to_csv(output_file_all, encoding='utf-8-sig')
    print(f"All results saved to {output_file_all}")

    # Filter significant results (p-value < 0.05)
    significant_results = spearman_results[spearman_results['p-value'] < 0.05]
    significant_spearman_results[production_col] = significant_results

    if not significant_results.empty:
        output_file_significant = f'significant_spearman_results_{production_col.replace(" ", "_")}.csv'
        significant_results.to_csv(output_file_significant, encoding='utf-8-sig')
        print(f"Significant results saved to {output_file_significant}\n")
    else:
        print(f"No significant results for {production_col}\n")

# Display all results and significant results
for production_col, results in all_spearman_results.items():
    print(f"\nSpearman Correlation Results for {production_col}:\n", results)

print("\nSignificant Spearman Correlation Results:")
for production_col, results in significant_spearman_results.items():
    if not results.empty:
        print(f"\n{production_col}:\n", results)


Calculating Spearman correlation for 밤 (kg)...
All results saved to spearman_results_밤_(kg).csv
Significant results saved to significant_spearman_results_밤_(kg).csv

Calculating Spearman correlation for 복분자딸기 (kg)...
All results saved to spearman_results_복분자딸기_(kg).csv
No significant results for 복분자딸기 (kg)

Calculating Spearman correlation for 오갈피 (kg)...
All results saved to spearman_results_오갈피_(kg).csv
No significant results for 오갈피 (kg)

Calculating Spearman correlation for 마 (kg)...
All results saved to spearman_results_마_(kg).csv
No significant results for 마 (kg)

Calculating Spearman correlation for 도라지 (kg)...
All results saved to spearman_results_도라지_(kg).csv
No significant results for 도라지 (kg)

Calculating Spearman correlation for 더덕 (kg)...
All results saved to spearman_results_더덕_(kg).csv
No significant results for 더덕 (kg)

Calculating Spearman correlation for 생표고 (kg)...
All results saved to spearman_results_생표고_(kg).csv
No significant results for 생표고 (kg)


Spearman Corre

In [11]:
import pandas as pd
from scipy.stats import chi2_contingency, f_oneway

# Load the dataset
file_path = 'C:/Users/user/Desktop/진짜임 이게 찐.csv'  # 예: '/mnt/data/진짜임 이게 찐.csv'
data = pd.read_csv(file_path, encoding='cp949')  # 파일에 따라 인코딩 변경 가능

# List of categorical columns
categorical_columns = ['토양깊이유형', '토성코드', '토양형코드', '토양유효수분량']

# Ensure categorical columns are treated as objects
for col in categorical_columns:
    data[col] = data[col].astype('object')

# Ensure production columns are numeric
production_columns = ['밤 (kg)', '복분자딸기 (kg)', '오갈피 (kg)', '마 (kg)', '도라지 (kg)', '더덕 (kg)', '생표고 (kg)']
for col in production_columns:
    data[col] = pd.to_numeric(data[col], errors='coerce').fillna(0)

# Function to perform Chi-Square and ANOVA analysis
def perform_analysis_for_production(data, production_col, categorical_cols):
    """
    Perform Chi-Square and ANOVA analysis for a given production column, excluding rows where production is 0.

    Parameters:
        data (DataFrame): The input dataset.
        production_col (str): The production column to analyze.
        categorical_cols (list): The list of categorical columns to analyze.

    Returns:
        chi2_results_df (DataFrame): DataFrame of Chi-Square p-values.
        anova_results_df (DataFrame): DataFrame of ANOVA p-values.
    """
    # Filter the data to exclude rows where production is 0
    filtered_data = data[data[production_col] != 0].copy()

    # Recalculate Chi-Square test
    chi2_results = {
        cat_col: {
            'p-value': chi2_contingency(pd.crosstab(filtered_data[cat_col], filtered_data[production_col]))[1]
        }
        for cat_col in categorical_cols
    }

    # Recalculate ANOVA test
    anova_results = {
        cat_col: {
            'p-value': f_oneway(*[group[production_col].values for name, group in filtered_data.groupby(cat_col)])[1]
        }
        for cat_col in categorical_cols
    }

    # Convert results into DataFrames
    chi2_results_df = pd.DataFrame.from_dict(chi2_results, orient='index', columns=['p-value'])
    anova_results_df = pd.DataFrame.from_dict(anova_results, orient='index', columns=['p-value'])

    return chi2_results_df, anova_results_df

# Example usage
# Replace '복분자딸기 (kg)' with the column name of the production you want to analyze
chi2_results, anova_results = perform_analysis_for_production(data, '복분자딸기 (kg)', categorical_columns)

# Save results to CSV or print
chi2_results.to_csv('chi2_results.csv', encoding='utf-8-sig')
anova_results.to_csv('anova_results.csv', encoding='utf-8-sig')

# Or display results directly
print("Chi-Square Test Results:")
print(chi2_results)
print("\nANOVA Test Results:")
print(anova_results)


Chi-Square Test Results:
          p-value
토양깊이유형   0.425951
토성코드     0.552794
토양형코드    0.984651
토양유효수분량  0.520064

ANOVA Test Results:
          p-value
토양깊이유형   0.973020
토성코드     0.740754
토양형코드    0.554722
토양유효수분량  0.000004


In [13]:
import pandas as pd
from scipy.stats import chi2_contingency, f_oneway

# Load the dataset
file_path = 'C:/Users/user/Desktop/진짜임 이게 찐.csv'  # CSV 파일 경로 입력
data = pd.read_csv(file_path, encoding='cp949')

# List of categorical columns
categorical_columns = ['토양깊이유형', '토성코드', '토양형코드', '토양유효수분량']

# Ensure categorical columns are treated as objects
for col in categorical_columns:
    data[col] = data[col].astype('object')

# Ensure production columns are numeric
production_columns = ['밤 (kg)', '복분자딸기 (kg)', '오갈피 (kg)', '마 (kg)', '도라지 (kg)', '더덕 (kg)', '생표고 (kg)']
for col in production_columns:
    data[col] = pd.to_numeric(data[col], errors='coerce').fillna(0)

# Function to perform Chi-Square and ANOVA analysis
def perform_analysis_for_production(data, production_col, categorical_cols):
    """
    Perform Chi-Square and ANOVA analysis for a given production column, excluding rows where production is 0.

    Parameters:
        data (DataFrame): The input dataset.
        production_col (str): The production column to analyze.
        categorical_cols (list): The list of categorical columns to analyze.

    Returns:
        chi2_results_df (DataFrame): DataFrame of Chi-Square p-values.
        anova_results_df (DataFrame): DataFrame of ANOVA p-values.
    """
    # Filter the data to exclude rows where production is 0
    filtered_data = data[data[production_col] != 0].copy()

    # Recalculate Chi-Square test
    chi2_results = {
        cat_col: {
            'p-value': chi2_contingency(pd.crosstab(filtered_data[cat_col], filtered_data[production_col]))[1]
        }
        for cat_col in categorical_cols
    }

    # Recalculate ANOVA test
    anova_results = {
        cat_col: {
            'p-value': f_oneway(*[group[production_col].values for name, group in filtered_data.groupby(cat_col)])[1]
        }
        for cat_col in categorical_cols
    }

    # Convert results into DataFrames
    chi2_results_df = pd.DataFrame.from_dict(chi2_results, orient='index', columns=['p-value'])
    anova_results_df = pd.DataFrame.from_dict(anova_results, orient='index', columns=['p-value'])

    return chi2_results_df, anova_results_df

# Iterate over all production columns and calculate Chi-Square and ANOVA
significant_results_cleaned = {}

for production_col in production_columns:
    print(f"Performing analysis for {production_col}...")
    
    chi2_results, anova_results = perform_analysis_for_production(data, production_col, categorical_columns)
    
    # Filter significant results (p-value < 0.05)
    chi2_significant = chi2_results[chi2_results['p-value'] < 0.05]
    anova_significant = anova_results[anova_results['p-value'] < 0.05]
    
    if not chi2_significant.empty or not anova_significant.empty:
        significant_results_cleaned[production_col] = {
            'Chi-Square': chi2_significant,
            'ANOVA': anova_significant
        }
    
    # Print results
    print(f"\nChi-Square Test Results for {production_col}:")
    print(chi2_results)
    print(f"\nANOVA Test Results for {production_col}:")
    print(anova_results)

# Display significant results only
print("\nSignificant Results (p-value < 0.05):")
for production_col, results in significant_results_cleaned.items():
    print(f"\n{production_col}:")
    print("Chi-Square Significant Results:")
    print(results['Chi-Square'])
    print("\nANOVA Significant Results:")
    print(results['ANOVA'])


Performing analysis for 밤 (kg)...

Chi-Square Test Results for 밤 (kg):
          p-value
토양깊이유형   0.453127
토성코드     0.453127
토양형코드    0.435978
토양유효수분량  0.448365

ANOVA Test Results for 밤 (kg):
          p-value
토양깊이유형   0.878330
토성코드     0.170395
토양형코드    0.351903
토양유효수분량  0.532277
Performing analysis for 복분자딸기 (kg)...

Chi-Square Test Results for 복분자딸기 (kg):
          p-value
토양깊이유형   0.425951
토성코드     0.552794
토양형코드    0.984651
토양유효수분량  0.520064

ANOVA Test Results for 복분자딸기 (kg):
          p-value
토양깊이유형   0.973020
토성코드     0.740754
토양형코드    0.554722
토양유효수분량  0.000004
Performing analysis for 오갈피 (kg)...

Chi-Square Test Results for 오갈피 (kg):
          p-value
토양깊이유형   0.962914
토성코드     0.376942
토양형코드    0.121788
토양유효수분량  0.929333

ANOVA Test Results for 오갈피 (kg):
          p-value
토양깊이유형   0.811761
토성코드     0.436857
토양형코드    0.953100
토양유효수분량  0.096380
Performing analysis for 마 (kg)...

Chi-Square Test Results for 마 (kg):
          p-value
토양깊이유형   0.394916
토성코드     0.436744
토양형코드   