# Task 6: Data Analysis and Data Science with Python
## Part 1: Time Series Forecasting using ARIMA

In [2]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

ModuleNotFoundError: No module named 'pandas'

In [None]:
# Load the sales dataset
df = pd.read_csv('sales.csv', parse_dates=['Date'])
df.set_index('Date', inplace=True)
df.head()

In [None]:
# Plot sales trend with moving average
plt.figure(figsize=(10,5))
plt.plot(df['Sales'], label='Sales')
plt.plot(df['Sales'].rolling(window=7).mean(), label='7-day Moving Average', linestyle='--')
plt.title('Sales Trend Over Time')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Fit ARIMA model
model = ARIMA(df['Sales'], order=(2,1,2))
model_fit = model.fit()
model_fit.summary()

In [None]:
# Forecast next 15 days
forecast = model_fit.forecast(steps=15)
future_dates = pd.date_range(start=df.index[-1] + pd.Timedelta(days=1), periods=15)
forecast_df = pd.DataFrame({'Forecasted Sales': forecast}, index=future_dates)
forecast_df

In [None]:
# Plot forecast
plt.figure(figsize=(10,5))
plt.plot(df['Sales'], label='Historical Sales')
plt.plot(forecast_df, label='Forecasted Sales', linestyle='--')
plt.title('Sales Forecast')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Evaluate Model
train = df['Sales'][:-15]
test = df['Sales'][-15:]
model_eval = ARIMA(train, order=(2,1,2)).fit()
pred = model_eval.forecast(steps=15)
rmse = np.sqrt(mean_squared_error(test, pred))
mape = mean_absolute_percentage_error(test, pred) * 100
print(f'RMSE: {rmse:.2f}')
print(f'MAPE: {mape:.2f}%')

## Part 2: Heart Disease Prediction using Logistic Regression

In [None]:
# Import necessary libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [None]:
# Load heart disease dataset
df = pd.read_csv('heart_disease.csv')
df.head()

In [None]:
# Prepare data
X = df.drop('Heart_Disease', axis=1)
y = df['Heart_Disease']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Train model
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
model = LogisticRegression()
model.fit(X_train, y_train)

In [None]:
# Evaluate model
y_pred = model.predict(X_test)
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))
print('
Classification Report:')
print(classification_report(y_test, y_pred))
print(f'Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%')