In [1]:
import accuracy_score
import zipfileimport pandas as pd
import os
import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics

In [2]:
folder_data = "data"
os.makedirs("models", exist_ok=True)

In [3]:
diabetes_zip = os.path.join(folder_data, "diabetes.csv.zip")
diabetes = pd.read_csv(diabetes_zip)
print("Diabetes shape:", diabetes.shape)

FileNotFoundError: [Errno 2] No such file or directory: 'data\\diabetes.csv.zip'

In [None]:
heart_zip = os.path.join(folder_data, "heart.csv.zip")
with zipfile.ZipFile(heart_zip) as z:
    csv_name = [name for name in z.namelist() if name.endswith(".csv")][0]
    heart = pd.read_csv(z.open(csv_name))
print("Heart shape:", heart.shape)

In [None]:
cancer_zip = os.path.join(folder_data, "cancer.csv.zip")
with zipfile.ZipFile(cancer_zip) as z:
    csv_name = [name for name in z.namelist() if name.endswith(".csv")][0]
    cancer = pd.read_csv(z.open(csv_name))
print("Cancer shape:", cancer.shape)

cancer=cancer.drop(columns=["id","Unnamed: 32"],errors="ignore")
cancer.columns=cancer.columns.str.replace(" ","_")

cancer=cancer.dropna(subset=["diagnosis"])
cancer["diagnosis"]=cancer["diagnosis"].map({"M":1,"B":0})

print("Cancer Shape (cleaned):",cancer.shape)
print("Cancer Columns:",cancer.columns.tolist())

In [None]:
def train_and_save_model(data, target_column, model_path, save_features=False):

    
    X = data.drop(columns=[target_column])
    y = data[target_column]

   
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    
    model = RandomForestClassifier()
    model.fit(X_train, y_train)

   
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"Model accuracy ({model_path}): {acc:.2f}")

    
    with open(model_path, "wb") as f:
        pickle.dump(model, f)
    print(f"✅ Model kaydedildi: {model_path}")

   
    if save_features:
        features = X.columns.tolist()
        # Mutlaka doğru klasöre yaz
        features_path = os.path.join("models", "cancer_features.pkl")
        with open(features_path, "wb") as f:
            pickle.dump(features, f)
        print(f"✅ Cancer features kaydedildi: {features_path}")

    return acc


In [None]:
train_and_save_model(
    diabetes,
    target_column="Outcome",
    model_path="models/diabetes_model.pkl"
)

train_and_save_model(
    heart,
    target_column="target",
    model_path="models/heart_model.pkl"
)

train_and_save_model(
    cancer,
    target_column="diagnosis",
    model_path="models/cancer_model.pkl",
    save_features=True   # ✅ Burada özellikler de kaydedilecek
)


In [None]:
with open("models/diabetes_model.pkl", "rb") as f:
    diabetes_model = pickle.load(f)
X_sample_diabetes = diabetes.head().drop(columns=["Outcome"])
print("Diyabet Tahminleri:", diabetes_model.predict(X_sample_diabetes))

with open("models/heart_model.pkl", "rb") as f:
    heart_model = pickle.load(f)
X_sample_heart = heart.head().drop(columns=["target"])
print("Heart Tahminleri:", heart_model.predict(X_sample_heart))

with open("models/cancer_model.pkl", "rb") as f:
    cancer_model = pickle.load(f)
X_sample_cancer = cancer.head().drop(columns=["diagnosis"])
print("Cancer Tahminleri:", cancer_model.predict(X_sample_cancer))