In [1]:
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.manifold import TSNE
import numpy as np
import pandas as pd

In [2]:
stardf = pd.read_csv("big_clean_stars.csv")
stardf

Unnamed: 0,Vmag,Plx,e_Plx,B-V,SpType
0,9.10,3.54,1.39,0.482,F5
1,9.27,21.90,3.10,0.999,K3V
2,6.61,2.81,0.63,-0.019,B9
3,8.06,7.75,0.97,0.370,F0V
4,8.55,2.87,1.11,0.902,G8III
...,...,...,...,...,...
96737,7.92,3.93,1.24,0.133,Am...
96738,8.72,3.07,0.87,0.097,B3
96739,8.08,1.07,0.68,1.094,G5
96740,6.98,2.97,0.76,-0.143,B1.5V


In [3]:
stardf=stardf[stardf.Plx !=0]
stardf

Unnamed: 0,Vmag,Plx,e_Plx,B-V,SpType
0,9.10,3.54,1.39,0.482,F5
1,9.27,21.90,3.10,0.999,K3V
2,6.61,2.81,0.63,-0.019,B9
3,8.06,7.75,0.97,0.370,F0V
4,8.55,2.87,1.11,0.902,G8III
...,...,...,...,...,...
96737,7.92,3.93,1.24,0.133,Am...
96738,8.72,3.07,0.87,0.097,B3
96739,8.08,1.07,0.68,1.094,G5
96740,6.98,2.97,0.76,-0.143,B1.5V


In [4]:
stardf = stardf.reset_index(drop=True)

In [5]:
stardf

Unnamed: 0,Vmag,Plx,e_Plx,B-V,SpType
0,9.10,3.54,1.39,0.482,F5
1,9.27,21.90,3.10,0.999,K3V
2,6.61,2.81,0.63,-0.019,B9
3,8.06,7.75,0.97,0.370,F0V
4,8.55,2.87,1.11,0.902,G8III
...,...,...,...,...,...
96702,7.92,3.93,1.24,0.133,Am...
96703,8.72,3.07,0.87,0.097,B3
96704,8.08,1.07,0.68,1.094,G5
96705,6.98,2.97,0.76,-0.143,B1.5V


In [6]:
stardf["Amag"] = stardf["Vmag"] + 5* (np.log10(abs(stardf["Plx"]))+1)

In [7]:
stardf

Unnamed: 0,Vmag,Plx,e_Plx,B-V,SpType,Amag
0,9.10,3.54,1.39,0.482,F5,16.845016
1,9.27,21.90,3.10,0.999,K3V,20.972221
2,6.61,2.81,0.63,-0.019,B9,13.853532
3,8.06,7.75,0.97,0.370,F0V,17.506509
4,8.55,2.87,1.11,0.902,G8III,15.839409
...,...,...,...,...,...,...
96702,7.92,3.93,1.24,0.133,Am...,15.891963
96703,8.72,3.07,0.87,0.097,B3,16.155692
96704,8.08,1.07,0.68,1.094,G5,13.226919
96705,6.98,2.97,0.76,-0.143,B1.5V,14.343782


In [8]:
stardf.info ()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 96707 entries, 0 to 96706
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Vmag    96707 non-null  float64
 1   Plx     96707 non-null  float64
 2   e_Plx   96707 non-null  float64
 3   B-V     96707 non-null  float64
 4   SpType  96707 non-null  object 
 5   Amag    96707 non-null  float64
dtypes: float64(5), object(1)
memory usage: 4.4+ MB


In [9]:
stardf['TargetClass'] = stardf['SpType']

In [10]:
for i in range(len(stardf['TargetClass'])):
    if "O" in stardf.loc[i,'TargetClass']: 
        stardf.loc[i,'TargetClass'] = 0 # O (Blue, 10 Lacerta)
    elif "B" in stardf.loc[i,'TargetClass']: 
        stardf.loc[i,'TargetClass'] = 1 # (Blue, Rigel)   
    elif "A" in stardf.loc[i,'TargetClass']: 
        stardf.loc[i,'TargetClass'] = 2 # (Blue, Sirius)
    elif "F" in stardf.loc[i,'TargetClass']: 
        stardf.loc[i,'TargetClass'] = 3 # (Blue,White Procyon)
    elif "G" in stardf.loc[i,'TargetClass']: 
        stardf.loc[i,'TargetClass'] = 4 # (White, yellow Sun)
    elif "K" in stardf.loc[i,'TargetClass']: 
        stardf.loc[i,'TargetClass'] = 5 # (Orange,Red Arcturus)
    elif "M" in stardf.loc[i,'TargetClass']: 
        stardf.loc[i,'TargetClass'] = 6 # (Red, Betelguese)
    else: 
        stardf.loc[i,'TargetClass'] = 7 # None- not in main spectral class

In [11]:
stardf['TargetClass'].value_counts ()

5    26544
3    21672
4    19497
2    15352
1     9144
6     3950
7      316
0      232
Name: TargetClass, dtype: int64

In [12]:
stardf

Unnamed: 0,Vmag,Plx,e_Plx,B-V,SpType,Amag,TargetClass
0,9.10,3.54,1.39,0.482,F5,16.845016,3
1,9.27,21.90,3.10,0.999,K3V,20.972221,5
2,6.61,2.81,0.63,-0.019,B9,13.853532,1
3,8.06,7.75,0.97,0.370,F0V,17.506509,3
4,8.55,2.87,1.11,0.902,G8III,15.839409,4
...,...,...,...,...,...,...,...
96702,7.92,3.93,1.24,0.133,Am...,15.891963,2
96703,8.72,3.07,0.87,0.097,B3,16.155692,1
96704,8.08,1.07,0.68,1.094,G5,13.226919,4
96705,6.98,2.97,0.76,-0.143,B1.5V,14.343782,1


In [14]:
stardf= stardf.drop('SpType', axis=1)
stardf

Unnamed: 0,Vmag,Plx,e_Plx,B-V,Amag,TargetClass
0,9.10,3.54,1.39,0.482,16.845016,3
1,9.27,21.90,3.10,0.999,20.972221,5
2,6.61,2.81,0.63,-0.019,13.853532,1
3,8.06,7.75,0.97,0.370,17.506509,3
4,8.55,2.87,1.11,0.902,15.839409,4
...,...,...,...,...,...,...
96702,7.92,3.93,1.24,0.133,15.891963,2
96703,8.72,3.07,0.87,0.097,16.155692,1
96704,8.08,1.07,0.68,1.094,13.226919,4
96705,6.98,2.97,0.76,-0.143,14.343782,1


In [15]:
stardf.to_csv("TG_stars.csv", index=False)