In [None]:
from ucimlrepo import fetch_ucirepo
import pandas as pd
from sklearn.preprocessing import StandardScaler

## Function to load and prepare a dataset
def load_and_prepare_dataset(dataset_id):
    # Fetch dataset from UCI repository using its ID
    dataset = fetch_ucirepo(id=dataset_id)
    X = dataset.data.features  # Feature matrix
    y = dataset.data.targets.values.ravel()  # Flatten target array for compatibility with sklearn
    
    # Specific preprocessing for the dataset with ID 183
    if dataset_id == 183:
        # Remove non-predictive columns based on domain knowledge
        non_predictive_columns = ['county', 'communityname', 'fold', 'state', 'community']
        X = X.drop(columns=non_predictive_columns, errors='ignore')
    
    # Convert categorical variables to dummy variables
    X = pd.get_dummies(X, drop_first=True)
    
    # Replace any missing values with the mean of each column
    X = X.fillna(X.mean())
    
    # Output dataset metadata and variable information for verification and insight
    print(f"Metadata for dataset {dataset_id}:\n", dataset.metadata)
    print(f"Variable information for dataset {dataset_id}:\n", dataset.variables)
    return X, y

# Define IDs of datasets to load
dataset_ids = [165, 275, 183]
datasets = {}

# Load and prepare each dataset by ID
for dataset_id in dataset_ids:
    X, y = load_and_prepare_dataset(dataset_id)
    datasets[dataset_id] = (X, y)

# Initialize a StandardScaler to normalize feature data
scaler = StandardScaler()

# Standardize features and targets for each dataset
for dataset_id, (X, y) in datasets.items():
    X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)
    y_scaled = scaler.fit_transform(y.reshape(-1, 1)).ravel()
    datasets[dataset_id] = (X_scaled, y_scaled)

# Save the standardized datasets to CSV files for further analysis or machine learning
for dataset_id, (X, y) in datasets.items():
    combined_df = pd.concat([X, pd.DataFrame(y, columns=['target'])], axis=1)
    combined_df.to_csv(f'dataset_{dataset_id}.csv', index=False)

# Display the first few rows of each dataset to confirm successful loading and preparation
print("Concrete Compressive Strength Dataset:")
X_concrete, y_concrete = datasets[165]
print(pd.concat([X_concrete, pd.DataFrame(y_concrete, columns=['target'])], axis=1).head())

print("\nBike Sharing Dataset:")
X_bike, y_bike = datasets[275]
print(pd.concat([X_bike, pd.DataFrame(y_bike, columns=['target'])], axis=1).head())

print("\nCommunity and Crime Dataset:")
X_credit, y_credit = datasets[183]
print(pd.concat([X_credit, pd.DataFrame(y_credit, columns=['target'])], axis=1).head())
