In [4]:
import pandas as pd
from pathlib import Path

# Paths
raw_path = Path("E:/IDE/Python/PythonProject/Rain/Rain-Prediction-Using-LSTM/data/raw")
csv_files = list(raw_path.glob("*.csv"))

# Read and merge
df_list = [pd.read_csv(file) for file in csv_files]
merged_df = pd.concat(df_list, ignore_index=True)

# Create a mapping from old -> new column names
rename_map = {
    "tempmin": "MinTemp",
    "tempmax": "MaxTemp",
    "winddir": "WindGustDir",       # assuming winddir ~ WindGustDir
    "windgust": "WindGustSpeed",    # assuming windgust ~ WindGustSpeed
    "humidity": "Humidity",
    "sealevelpressure": "Pressure",
    "temp": "Temp"
}

# Keep only required features + target
features = ['MinTemp', 'MaxTemp', 'WindGustDir', 'WindGustSpeed', 'Humidity', 'Pressure', 'Temp']
target = 'RainTomorrow'

# Apply renaming
merged_df = merged_df.rename(columns=rename_map)

# Subset the dataset
final_df = merged_df[features].copy()

# For target, create a dummy column (since original dataset may not have RainTomorrow)
# Example: If precipprob > 50% → RainTomorrow = Yes else No
if 'precipprob' in merged_df.columns:
    final_df[target] = merged_df['precipprob'].apply(lambda x: "Yes" if x > 50 else "No")
else:
    final_df[target] = "No"  # fallback placeholder

# Save
output_path = Path("data/processed/final_dataset.csv")
output_path.parent.mkdir(parents=True, exist_ok=True)
final_df.to_csv(output_path, index=False)

print(f"Final dataset with features {features} and target '{target}' saved at {output_path}")


Final dataset with features ['MinTemp', 'MaxTemp', 'WindGustDir', 'WindGustSpeed', 'Humidity', 'Pressure', 'Temp'] and target 'RainTomorrow' saved at data\processed\final_dataset.csv


In [5]:
import pandas as pd
from pathlib import Path

# Function to convert degrees → compass direction
def deg_to_compass(deg):
    dirs = ['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW']
    ix = round(deg / 45) % 8
    return dirs[ix]

# Paths
raw_path = Path("E:/IDE/Python/PythonProject/Rain/Rain-Prediction-Using-LSTM/data/raw")
csv_files = list(raw_path.glob("*.csv"))

# Read and merge
df_list = [pd.read_csv(file) for file in csv_files]
merged_df = pd.concat(df_list, ignore_index=True)

# Extract and rename only required columns
final_df = pd.DataFrame({
    "MinTemp": merged_df["tempmin"],
    "MaxTemp": merged_df["tempmax"],
    "WindGustDir": merged_df["winddir"].apply(deg_to_compass),  # convert degrees → direction
    "WindGustSpeed": merged_df["windgust"],
    "Humidity": merged_df["humidity"],
    "Pressure": merged_df["sealevelpressure"],
    "Temp": merged_df["temp"]
})

# Generate RainTomorrow column
final_df["RainTomorrow"] = merged_df["precipprob"].apply(lambda x: "Yes" if x > 50 else "No")

# Save
output_path = Path("data/processed/final_dataset.csv")
output_path.parent.mkdir(parents=True, exist_ok=True)
final_df.to_csv(output_path, index=False)

print("✅ Final dataset saved:", output_path)


✅ Final dataset saved: data\processed\final_dataset.csv
