# Loading data

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Read data
all_data = pd.read_csv("../input/star-dataset/6 class csv.csv")

all_data["Star color"] = pd.factorize(all_data["Star color"])[0]
all_data["Spectral Class"] = pd.factorize(all_data["Spectral Class"])[0]

all_data.head()

In [None]:
all_data.describe()

# Feature analysis

In [None]:
# Visualize
sns.pairplot(all_data, hue="Star type")

In [None]:
# Feature correlation matrix
corr_matrix = all_data.corr()
sns.heatmap(corr_matrix, annot=True)

In [None]:
# Check only star type correlations
print(corr_matrix["Star type"])

# Model

In [None]:
# Import model
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
# Prepare data
# Choose highly correlated features
columns = ["Absolute magnitude(Mv)","Radius(R/Ro)", "Luminosity(L/Lo)","Spectral Class", "Temperature (K)"]
X = all_data[columns].values
y = all_data["Star type"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.50, random_state=42)

In [None]:
# Train model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Evaluation

In [None]:
# Evaluate model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_pred, y_test)
print(f"Model accuracy: {accuracy}")