In [None]:
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

file_path = '/Users/tomassalazar/Desktop/Work/Krawcyzski Group/ESPM.Code/Petrology/Kamchatka/EPMA S.2 Analyses/EPMA RAW DATA.2/Tomas Salazar 7-22-2024 samples.xlsx'
sheet_name = '1704'

try:
    df = pd.read_excel(file_path, sheet_name=sheet_name)
except Exception as e:
    print(f"Error reading the Excel file: {e}")
    raise

df.columns = df.columns.str.strip()

# Features
X = df[['Cl', 'F', 'SiO2', 'MgO', 'CaO']]  # Example feature columns

# Define the pre-calculated values (placeholders for actual values)
# These should be assigned based on the actual results from previous cells
# Example values, replace with actual values from previous calculations
Amph_IronRatio = [0.5] * len(df)  # Placeholder: replace with actual values
fO2 = [10] * len(df)  # Placeholder: replace with actual values

# Add these values to the DataFrame if needed for further analysis
df['Amph_IronRatio'] = Amph_IronRatio
df['fO2'] = fO2

# Calculate weight percent oxides (assuming these are already in the correct units)
df['FeO_wt_percent'] = df['FeO']  # Example: Directly from the data

# Targets
y_Amph_IronRatio = df['Amph_IronRatio']
y_FeO_wt_percent = df['FeO_wt_percent']

# Split data into training and testing sets
X_train, X_test, y_train_Amph_IronRatio, y_test_Amph_IronRatio = train_test_split(X, y_Amph_IronRatio, test_size=0.2, random_state=42)
X_train, X_test, y_train_FeO_wt_percent, y_test_FeO_wt_percent = train_test_split(X, y_FeO_wt_percent, test_size=0.2, random_state=42)

# Create a PCR pipeline
def pcr_pipeline(n_components):
    pcr = Pipeline([
        ('scaler', StandardScaler()),
        ('pca', PCA(n_components=n_components)),
        ('regression', LinearRegression())
    ])
    return pcr

# PCR for Amph_IronRatio
pcr_Amph_IronRatio = pcr_pipeline(n_components=2)  # Adjust n_components as needed
pcr_Amph_IronRatio.fit(X_train, y_train_Amph_IronRatio)
y_pred_Amph_IronRatio = pcr_Amph_IronRatio.predict(X_test)

# Evaluate Amph_IronRatio PCR
mse_Amph_IronRatio = mean_squared_error(y_test_Amph_IronRatio, y_pred_Amph_IronRatio)
r2_Amph_IronRatio = r2_score(y_test_Amph_IronRatio, y_pred_Amph_IronRatio)
print(f'Amphibole Iron Ratio PCR MSE: {mse_Amph_IronRatio:.3f}')
print(f'Amphibole Iron Ratio PCR R^2: {r2_Amph_IronRatio:.3f}')

# PCR for FeO weight percent
pcr_FeO_wt_percent = pcr_pipeline(n_components=2)  # Adjust n_components as needed
pcr_FeO_wt_percent.fit(X_train, y_train_FeO_wt_percent)
y_pred_FeO_wt_percent = pcr_FeO_wt_percent.predict(X_test)

# Evaluate FeO weight percent PCR
mse_FeO_wt_percent = mean_squared_error(y_test_FeO_wt_percent, y_pred_FeO_wt_percent)
r2_FeO_wt_percent = r2_score(y_test_FeO_wt_percent, y_pred_FeO_wt_percent)
print(f'FeO Weight Percent PCR MSE: {mse_FeO_wt_percent:.3f}')
print(f'FeO Weight Percent PCR R^2: {r2_FeO_wt_percent:.3f}')
