In [None]:
!pip install boto3 joblib



In [None]:
from sklearn.datasets import load_breast_cancer
import pandas as pd

data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
df["target"] = data.target

df.to_csv("breast_cancer.csv", index=False)

In [None]:
import boto3

AWS_ACCESS_KEY = "your-access-key"
AWS_SECRET_KEY = "your-secret-key"
REGION = "your-region"
BUCKET_NAME = "your-bucket-name"
FILE_NAME = "breast_cancer.csv"

In [None]:
s3 = boto3.client("s3",
                  aws_access_key_id=AWS_ACCESS_KEY,
                  aws_secret_access_key=AWS_SECRET_KEY,
                  region_name=REGION)

s3.upload_file("breast_cancer.csv", BUCKET_NAME, FILE_NAME)
print("✅ Uploaded breast_cancer.csv to S3 bucket:", BUCKET_NAME)

✅ Uploaded breast_cancer.csv to S3 bucket: day6ofmlandawsjourney


In [None]:
from io import StringIO

obj = s3.get_object(Bucket=BUCKET_NAME, Key=FILE_NAME)
df_s3 = pd.read_csv(obj['Body'])

print("✅ Loaded dataset from S3")
print(df_s3.head())

✅ Loaded dataset from S3
   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280          0.1980              0.10430         0.1809   

   mean fractal dimension  ...  worst texture  worst perimeter  wor

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np

In [None]:
df_s3["radius_texture_ratio"] = df_s3["mean radius"] / (df_s3["mean texture"] + 1e-5)
df_s3["area_symmetry_product"] = df_s3["mean area"] * df_s3["symmetry error"]
df_s3["log_perimeter"] = np.log(df_s3["mean perimeter"] + 1)

In [None]:
X = df_s3.drop("target", axis=1)
y = df_s3["target"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

In [None]:
y_pred = rf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.9649122807017544


In [None]:
import joblib
import io

In [None]:
buffer = io.BytesIO()
joblib.dump(rf, buffer)
buffer.seek(0)

s3.put_object(Bucket=BUCKET_NAME, Key="breast_cancer_model.pkl", Body=buffer.getvalue())
print("✅ Model saved to S3 as breast_cancer_model.pkl")

✅ Model saved to S3 as breast_cancer_model.pkl
