In [5]:
import pandas as pd
from scipy.stats import pearsonr

In [6]:
# Load and clean data
df = pd.read_csv("financial_data.csv")
df.columns = df.columns.str.strip().str.lower()  # Standardize column names

In [7]:
# Clean 'sales' and 'profit' columns, coercing errors to NaN
df['sales'] = pd.to_numeric(df['sales'].str.replace('[$,]', '', regex=True), errors='coerce')
df['profit'] = pd.to_numeric(df['profit'].str.replace('[$,]', '', regex=True), errors='coerce')

In [8]:
# Handle NaN values (choose one of the following options)
df['sales'].fillna(0, inplace=True)
df['profit'].fillna(0, inplace=True)
# OR drop rows with NaN
# df.dropna(subset=['sales', 'profit'], inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['sales'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['profit'].fillna(0, inplace=True)


In [9]:
# Calculate Pearson correlation and p-value
correlation, p_value = pearsonr(df['sales'], df['profit'])

In [10]:

# Output results
print("Pearson Correlation Coefficient:", correlation)
print("P-value:", p_value)

Pearson Correlation Coefficient: 0.8313678786581095
P-value: 2.955523839770789e-180


In [11]:

# Interpretation
alpha = 0.05  # significance level
if p_value < alpha:
    print("Reject the null hypothesis: Significant correlation.")
else:
    print("Fail to reject the null hypothesis: No significant correlation.")

Reject the null hypothesis: Significant correlation.
