In [None]:
# STEP 1: Upload the zip file
from google.colab import files
uploaded = files.upload()  # Upload 'bank+marketing.zip'

# STEP 2: Extract the outer zip
import zipfile, os

main_zip = next(iter(uploaded))
outer_path = "extracted_data"

with zipfile.ZipFile(main_zip, 'r') as zip_ref:
    zip_ref.extractall(outer_path)

print("‚úÖ Main ZIP extracted!")

# STEP 3: Extract nested zip inside outer zip
nested_zips = ['bank.zip', 'bank-additional.zip']
extracted_csv_dir = "final_data"

found = False
for z in nested_zips:
    zip_path = os.path.join(outer_path, z)
    if os.path.exists(zip_path):
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extracted_csv_dir)
        print(f"‚úÖ Extracted nested zip: {z}")
        found = True
        break

if not found:
    raise FileNotFoundError("‚ùå No nested zip found to extract!")

# STEP 4: Locate and load 'bank-full.csv'
import glob
import pandas as pd

csv_files = glob.glob(extracted_csv_dir + "/*.csv")
print("üìÑ Found CSV files:", csv_files)

csv_path = None
for f in csv_files:
    if "bank-full.csv" in f:
        csv_path = f
        break

if csv_path is None:
    raise FileNotFoundError("‚ùå 'bank-full.csv' not found!")

# Load CSV
df = pd.read_csv(csv_path, sep=';')
print("‚úÖ Dataset Loaded Successfully!")

# STEP 5: Encode categorical columns
from sklearn.preprocessing import LabelEncoder

categorical_cols = df.select_dtypes(include='object').columns
label_encoders = {}

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

print("‚úÖ Categorical features encoded.")

# STEP 6: Train-test split and model training
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

X = df.drop("y", axis=1)
y = df["y"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

# STEP 7: Model evaluation
print("\nüìä Accuracy:", accuracy_score(y_test, y_pred))
print("\nüìã Classification Report:\n", classification_report(y_test, y_pred))


Saving bank+marketing.zip to bank+marketing (1).zip
‚úÖ Main ZIP extracted!
‚úÖ Extracted nested zip: bank.zip
üìÑ Found CSV files: ['final_data/bank.csv', 'final_data/bank-full.csv']
‚úÖ Dataset Loaded Successfully!
‚úÖ Categorical features encoded.

üìä Accuracy: 0.8740462235983634

üìã Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.93      0.93      7952
           1       0.48      0.48      0.48      1091

    accuracy                           0.87      9043
   macro avg       0.70      0.70      0.70      9043
weighted avg       0.87      0.87      0.87      9043

