# ***Logistic Regression***
# To classify whether an object in space is a star or a planet.  dataset with features (like brightness, radius, distance, mass, temperature, etc.) and a binary target column (0 = planet, 1 = star).

In [None]:
import pandas as pd
import numpy as np

np.random.seed(42)
n_samples = 500_000  # 250k planets + 250k stars

# --------------------------
# Planets (label=0)
# --------------------------
planet_brightness = np.random.normal(3.0, 2.0, n_samples//2)
planet_radius = np.random.normal(3.0, 1.5, n_samples//2)
planet_mass = np.random.normal(5.0, 3.0, n_samples//2)
planet_temp = np.random.normal(3000, 1500, n_samples//2)
planet_labels = np.zeros(n_samples//2)

# --------------------------
# Stars (label=1)
# --------------------------
star_brightness = np.random.normal(6.0, 2.5, n_samples//2)
star_radius = np.random.normal(7.0, 2.0, n_samples//2)
star_mass = np.random.normal(15.0, 6.0, n_samples//2)
star_temp = np.random.normal(6000, 2000, n_samples//2)
star_labels = np.ones(n_samples//2)

# --------------------------
# Combine
# --------------------------
brightness = np.concatenate([planet_brightness, star_brightness])
radius = np.concatenate([planet_radius, star_radius])
mass = np.concatenate([planet_mass, star_mass])
temperature = np.concatenate([planet_temp, star_temp])
labels = np.concatenate([planet_labels, star_labels])

# --------------------------
# Date of Observation
# --------------------------
# Random dates over 10 years
start_date = np.datetime64('2015-01-01')
end_date = np.datetime64('2025-01-01')
date_of_obs = start_date + (end_date - start_date) * np.random.rand(n_samples)

# --------------------------
# Distance from Earth in light-years
# --------------------------
# Planets closer, stars farther on average
distance_planets = np.random.uniform(0.1, 50, n_samples//2)  # planets: 0.1–50 ly
distance_stars = np.random.uniform(10, 5000, n_samples//2)   # stars: 10–5000 ly
distance = np.concatenate([distance_planets, distance_stars])

# --------------------------
# Create DataFrame
# --------------------------
data = pd.DataFrame({
    "brightness": brightness,
    "radius": radius,
    "mass": mass,
    "temperature": temperature,
    "date_of_observation": date_of_obs,
    "distance_from_earth": distance,
    "label": labels.astype(int)
})

# Shuffle dataset
data = data.sample(frac=1, random_state=42).reset_index(drop=True)

# Save CSV
data.to_csv("space_objects.csv", index=False)

print("✅ Generated space_objects.csv ")



✅ Generated space_objects.csv 


In [None]:
import pandas as pd

# Load the dataset
data = pd.read_csv("space_objects.csv")

# Print first 10 rows
print(data.head(10))

   brightness    radius       mass   temperature date_of_observation  \
0    5.117922  6.140672   5.608289   3955.936332          2019-07-31   
1    5.726507  3.941012   6.455729   2398.436778          2019-09-23   
2    3.371354  2.916086   8.450476   4501.784354          2022-09-27   
3    4.067357  2.575323   4.666242   4681.506028          2017-09-21   
4    6.344610  9.735034  15.597008  10462.867988          2020-06-07   
5    2.996124  2.604806  -0.435316   6653.339679          2017-10-09   
6    3.338595  1.777956   5.366844   -335.178548          2015-09-15   
7    6.906976  6.914443  17.515065   8091.278010          2017-02-07   
8    2.142039  1.035154   4.279303   2942.548642          2018-11-27   
9    9.120397  7.282966   9.969434   5142.114609          2021-09-26   

   distance_from_earth  label  
0             6.506415      0  
1            29.603397      0  
2            48.987714      0  
3             6.598742      0  
4          1270.449607      1  
5            36

In [None]:
# Logistic Regression Classification of Stars vs Planets using CSV

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# -----------------------
# Load Dataset from CSV
# -----------------------
data = pd.read_csv("space_objects.csv")

# -----------------------
# Prepare Features
# -----------------------
# Keep only the core physical features for classification
feature_cols = ["brightness", "radius", "mass", "temperature"]
X = data[feature_cols].values
y = data["label"].values

# -----------------------
# Split Train/Test
# -----------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# -----------------------
# Feature Scaling
# -----------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# -----------------------
# Logistic Regression Model
# -----------------------
model = LogisticRegression(max_iter=1000, solver="liblinear")
model.fit(X_train_scaled, y_train)

# -----------------------
# Predictions
# -----------------------
y_pred = model.predict(X_test_scaled)


# -----------------------
# Test with a new object
# -----------------------
# Example: brightness=0.8, radius=1.1, mass=0.9, temperature=5600
new_object = np.array([[6, 9, 15, 10000]])
new_object_scaled = scaler.transform(new_object)
prediction = model.predict(new_object_scaled)

if prediction[0] == 1:
    print("The object is classified as a STAR 🌟")
else:
    print("The object is classified as a PLANET 🪐")



The object is classified as a STAR 🌟


In [None]:
# -----------------------
# Evaluation
# -----------------------
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.97342

Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.98      0.97     49900
           1       0.98      0.97      0.97     50100

    accuracy                           0.97    100000
   macro avg       0.97      0.97      0.97    100000
weighted avg       0.97      0.97      0.97    100000


Confusion Matrix:
 [[48763  1137]
 [ 1521 48579]]
