In [None]:
import pandas as pd
import numpy as np

# Load CSV
df = pd.read_csv("/content/2002.csv")

# Extract NumPy arrays - using the correct column names
area_orig = df['AREA_ORIG'].astype(str).to_numpy()
cause_ign = df['CAUSE_IGN'].astype(str).to_numpy()
fact_ign = df['FACT_IGN_1'].astype(str).to_numpy()


# Define relevant codes for solar/natural-related causes
area_orig_codes = ['14','20','21','22','23','24','25','26','27','28','52','55','72','73','74','75','76','77', 'UU']
cause_ign_codes = ['2', 'U']
fact_ign_codes = ['0','21','22','30','31','32','33','34','35','36','37','40','41','42','43','44','52','53','54','56','58','60','UU']

# Create NumPy boolean masks - updated to use the correct arrays
mask_area = np.isin(area_orig, area_orig_codes)
mask_cause = np.isin(cause_ign, cause_ign_codes)
mask_fact_ign = np.isin(fact_ign, fact_ign_codes)


# Combine all masks - updated to exclude mask_fire_cause
combined_mask = mask_area & mask_cause & mask_fact_ign

# Apply filter
filtered_df = df[combined_mask]

# Display results
# Sorting by 'HEAT_SOURC' as an example of a column that exists.
print(filtered_df.sort_values(by='HEAT_SOURC'))

       STATE AREA_ORIG HEAT_SOURC FIRST_IGN TYPE_MAT CAUSE_IGN FACT_IGN_1
249544    NC        24          0        26        0         2         52
157840    LA        14          0        UU       UU         2         53
157661    LA        24          0        UU      NaN         2         UU
325039    OK        14          0        UU       UU         2         UU
325094    OK        76          0        70       UU         2          0
...      ...       ...        ...       ...      ...       ...        ...
465420    WV        UU        NaN       NaN      NaN         U         UU
465461    WV        UU        NaN        UU       UU         U         UU
466116    WV        UU        NaN       NaN      NaN         U         UU
466320    WV        UU        NaN        72       41         U         UU
466373    WV        UU        NaN        UU      NaN         2         53

[47697 rows x 7 columns]


In [None]:
from scipy.stats import spearmanr

data = pd.read_csv("/content/Data.csv")
data.dropna()

Unnamed: 0,Year,Number of Targeted Fire Incidents,Sunspot
0,1986,67651.0,14.8
1,1987,66278.0,33.9
2,1988,71834.0,123.0
3,1989,69762.0,211.1
4,1990,67252.0,191.8
5,1991,68807.0,203.3
7,1993,71276.0,76.1
8,1994,69701.0,44.9
9,1995,67224.0,25.1
10,1996,72222.0,11.6


In [None]:
data.columns

Index(['Year', 'Number of Targeted Fire Incidents', 'Sunspot'], dtype='object')

In [None]:
from scipy.stats import spearmanr

# Drop rows with missing values in either column
x = data["Number of Targeted Fire Incidents"]
y = data["Sunspot"]
valid = x.notna() & y.notna()

# Calculate Spearman correlation
corr, p_value = spearmanr(x[valid], y[valid])

print("Spearman correlation:", corr)
print("P-value:", p_value)

Spearman correlation: -0.15979672501411638
P-value: 0.4774887142899461
