In [2]:
import numpy as np 
import pandas as pd 
import os
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# Import ML models
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor

In [3]:
import os
import pandas as pd

# Directory containing the text files
data_dir = "LSL_Models"
output_file = "extracted_data.csv"

# List to store the extracted information
results = []

# Loop through all text files in the directory
for file_name in os.listdir(data_dir):
    if file_name.endswith(".txt"):
        file_path = os.path.join(data_dir, file_name)
        
        # Read the file using pandas
        try:
            # Load the file as a DataFrame
            data = pd.read_csv(
                file_path,
                delim_whitespace=True,
                skiprows=1,
                names=["Depth[km]", "Vp[km/s]", "Vs[km/s]", "Rho[g/cm3]", "T[K]", "g[m/s2]", "P[Pa]"]
            )
        except Exception as e:
            print(f"Failed to read {file_name}: {e}")
            continue
        
        # Find the first instance where Vs = 0
        vs_zero_idx = data[data["Vs[km/s]"] == 0].index.min()
        
        if vs_zero_idx is not None and vs_zero_idx > 0:
            # Get data just before Vs becomes 0 and at Vs = 0
            row_before = data.iloc[vs_zero_idx - 1]
            row_zero = data.iloc[vs_zero_idx]
            
            results.append({
                "File": file_name,
                "Depth_before_vs_0": row_before["Depth[km]"],
                "Density_before_vs_0": row_before["Rho[g/cm3]"],
                "Vp_before_vs_0": row_before["Vp[km/s]"],
                "Vs_before_vs_0": row_before["Vs[km/s]"],
                "Depth_at_vs_0": row_zero["Depth[km]"],
                "Density_at_vs_0": row_zero["Rho[g/cm3]"],
                "Vp_at_vs_0": row_zero["Vp[km/s]"]
            })

# Save the results to a CSV file
if results:
    results_df = pd.DataFrame(results)
    results_df.to_csv(output_file, index=False)
    print(f"Data extracted and saved to {output_file}")
else:
    print("No data was extracted. Please check the input files.")


Data extracted and saved to extracted_data.csv


In [4]:
results_df

Unnamed: 0,File,Depth_before_vs_0,Density_before_vs_0,Vp_before_vs_0,Vs_before_vs_0,Depth_at_vs_0,Density_at_vs_0,Vp_at_vs_0
0,Model_1.txt,1554.450,3.9805,9.3722,4.9296,1554.450,4.0961,4.6592
1,Model_10.txt,1539.799,3.9590,9.2925,4.8731,1539.799,4.0540,5.0447
2,Model_100.txt,1555.011,3.9893,9.4078,4.9571,1555.011,4.0387,5.2995
3,Model_1000.txt,1551.778,4.0078,9.2710,4.8575,1551.778,3.9849,4.5471
4,Model_101.txt,1574.848,4.0184,9.3386,4.9027,1574.848,4.0492,5.1100
...,...,...,...,...,...,...,...,...
995,Model_995.txt,1564.855,4.0000,9.3698,4.9233,1564.855,4.0089,5.1142
996,Model_996.txt,1561.233,4.0138,9.3186,4.8914,1561.233,4.0455,4.7678
997,Model_997.txt,1552.456,3.9664,9.3152,4.8855,1552.456,4.0085,5.0235
998,Model_998.txt,1568.984,4.0032,9.3944,4.9408,1568.984,4.0068,5.3234


In [5]:
df=results_df

In [6]:
df['rho']=(df['Density_before_vs_0']+df['Density_at_vs_0'])/2

In [7]:
rad=3389.5 #Radius of Mars
df['R_c']=(df['Depth_before_vs_0']+df['Depth_at_vs_0'])/2

In [8]:
df = df[['File', 'R_c', 'rho', 'Vp_at_vs_0', 'Vp_before_vs_0', 'Vs_before_vs_0']]


In [9]:
df

Unnamed: 0,File,R_c,rho,Vp_at_vs_0,Vp_before_vs_0,Vs_before_vs_0
0,Model_1.txt,1554.450,4.03830,4.6592,9.3722,4.9296
1,Model_10.txt,1539.799,4.00650,5.0447,9.2925,4.8731
2,Model_100.txt,1555.011,4.01400,5.2995,9.4078,4.9571
3,Model_1000.txt,1551.778,3.99635,4.5471,9.2710,4.8575
4,Model_101.txt,1574.848,4.03380,5.1100,9.3386,4.9027
...,...,...,...,...,...,...
995,Model_995.txt,1564.855,4.00445,5.1142,9.3698,4.9233
996,Model_996.txt,1561.233,4.02965,4.7678,9.3186,4.8914
997,Model_997.txt,1552.456,3.98745,5.0235,9.3152,4.8855
998,Model_998.txt,1568.984,4.00500,5.3234,9.3944,4.9408


In [10]:
!pip install xgboost



In [12]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [13]:
# Separate features (X) and target (y)
X = df.drop(columns=['R_c','File'])
y = df['R_c']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a dictionary of models
models = {
    "Linear Regression": LinearRegression(),
    "Ridge": Ridge(),
    "Lasso": Lasso(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest": RandomForestRegressor(),
    "Gradient Boosting": GradientBoostingRegressor(),
    "AdaBoost": AdaBoostRegressor(),
    "K-Nearest Neighbors": KNeighborsRegressor(),
    "Support Vector Regressor": SVR(),
    "MLP Regressor": MLPRegressor(max_iter=500),
    "XGBoost": XGBRegressor(),
    "Extra Trees": RandomForestRegressor(),  # Replace with ExtraTreesRegressor if needed
    "Bagging Regressor": RandomForestRegressor()  # Replace with BaggingRegressor if needed
}

# Evaluate each model
results = []
for name, model in models.items():
    pipeline = make_pipeline(StandardScaler(), model)  # Ensures scaled features
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    # Store results
    results.append({
        "Model": name,
        "MSE": mse,
        "MAE": mae,
        "R2_Score": r2
    })
    results.append({"Model": name, "MSE": mse})

# Convert results to a DataFrame
results_df = pd.DataFrame(results)

# Sort results by MSE
results_df = results_df.sort_values(by="MSE").reset_index(drop=True)

# Save results to a CSV file
results_df.to_csv("model_comparison_results.csv", index=False)

print(results_df)




                       Model            MSE         MAE     R2_Score
0                    XGBoost      29.427524         NaN          NaN
1                    XGBoost      29.427524    3.637983     0.695471
2          Linear Regression      30.358253    4.142325     0.685839
3          Linear Regression      30.358253         NaN          NaN
4                      Ridge      30.629875    4.051677     0.683029
5                      Ridge      30.629875         NaN          NaN
6          Bagging Regressor      34.487136    4.298119     0.643112
7          Bagging Regressor      34.487136         NaN          NaN
8                Extra Trees      34.741701         NaN          NaN
9                Extra Trees      34.741701    4.266010     0.640478
10             Random Forest      36.544542    4.363906     0.621821
11             Random Forest      36.544542         NaN          NaN
12       K-Nearest Neighbors      43.788404    4.833245     0.546858
13       K-Nearest Neighbors      