In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pathlib
import joblib
from sklearn.metrics import r2_score, mean_squared_error



In [None]:
#Read the data
gal_csv=pd.read_csv("sqlgalaxy_table.csv")
#Check the columns
gal_csv.columns

In [None]:
#Drop missing and nan vals
gal_csv=gal_csv.dropna()
# Again check missing or null/nan vals
print(gal_csv.isnull())

#print indices of missing vals
missing_indices=gal_csv[gal_csv.isnull().any(axis=1)].index
print("\n")
print(missing_indices)
if len(missing_indices)==0:
	print("Good Data")
else:
	print("Missing Data")


In [None]:
#Drop specific cols not relevant
exclude_columns=["specObjID", "class", "subClass", "ra", "dec", "mjd"]
gal_csv_exc=gal_csv.drop(columns=exclude_columns)

In [None]:
# Standardize and scale the data
scaler = StandardScaler()
scaled_data=pd.DataFrame(scaler.fit_transform(gal_csv_exc), 
						 columns=gal_csv_exc.columns)
scaled_data


In [None]:
#split the data into targetter variable and features
features=scaled_data.drop(columns=["sfr_tot_p50"])
target=scaled_data["sfr_tot_p50"]
xtrain, xtest, ytrain, ytest=train_test_split(features,target, test_size=0.2)

In [None]:
#Init the model and make predictions
model=SVR()
model.fit(xtrain, ytrain)
ypred=model.predict(xtest)
#save the model with joblib
#joblib.dump(model, "star_formation_prediction_svr_model.pkl")


In [None]:
#Test how pred and actual data correlate
# Accurary check 
r_squared = r2_score(ytest, ypred)
mse = mean_squared_error(ytest, ypred)


plt.xlabel("SFR predicted in normalised units")
plt.ylabel("SFR actual in normalised units")
plt.title("SVR results Rsquared acc:{0:.2f}, Meansquare error:{1:.2f}".format(r_squared, mse))
plt.scatter(ypred,ytest, marker=".")

In [None]:
#Use this to load the model at a later stage
"""
load_model=joblib.load("star_formation_prediction_svr_model.pkl")
new_pred=load_model.predict(new_data)
"""

In [None]:
import seaborn as sns
corr=gal_csv_exc[['oh_p2p5', 'oh_p16', 'oh_p50', 'oh_p84', 'oh_p97p5',
       'lgm_tot_p2p5', 'lgm_tot_p16', 'lgm_tot_p50', 'lgm_tot_p84',
       'lgm_tot_p97p5', 'sfr_tot_p2p5', 'sfr_tot_p16', 'sfr_tot_p50',
       'sfr_tot_p84', 'sfr_tot_p97p5', 'sigma_balmer', 'oii_3726_reqw',
       'oii_3726_flux', 'neiii_3869_reqw', 'neiii_3869_flux', 'h_delta_reqw',
       'h_delta_flux', 'h_gamma_reqw', 'h_gamma_flux', 'oiii_4363_reqw',
       'oiii_4363_flux', 'h_beta_reqw', 'h_beta_flux', 'oiii_4959_reqw',
       'oiii_4959_flux', 'oiii_5007_reqw', 'oiii_5007_flux', 'hei_5876_reqw',
       'hei_5876_flux', 'oi_6300_reqw', 'oi_6300_flux', 'nii_6548_reqw',
       'nii_6548_flux', 'h_alpha_reqw', 'h_alpha_flux', 'nii_6584_reqw',
       'nii_6584_flux', 'sii_6717_reqw', 'sii_6717_flux', 'sii_6731_reqw',
       'sii_6731_flux', 'ariii7135_reqw', 'ariii7135_flux']].corr()


In [None]:
plt.figure(figsize=(10,10))
sns.heatmap(corr, xticklabels=corr.columns.values, 
			yticklabels=corr.columns.values)

In [None]:
gal_csv[["sfr_tot_p50", "z"]]

In [None]:
ytest

In [None]:
ypred