In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [None]:
df = pd.read_csv('christmas_recipes.csv')

In [None]:
# Explore/investigate  the dataset
print(df.head())

print(df.describe())

print(df.isnull().sum())

plt.hist(df['servings'], bins=10, color='skyblue', edgecolor='black')
plt.xlabel('Servings')
plt.ylabel('Frequency')
plt.title('Distribution of Servings')
plt.show()


correlation_matrix = df.corr()
plt.imshow(correlation_matrix, cmap='viridis', interpolation='nearest')
plt.colorbar()
plt.title('Correlation Matrix')
plt.show()


In [None]:
# Array Basics and Basic Operations

ingredients_array = np.array(df['ingredients'])

array_length = len(ingredients_array)

In [None]:
# Perform basic operations on the dataset

selected_columns = ['title', 'servings', 'ingredients']

selected_data = df[selected_columns]


filtered_data = df[df['time'] < 60]

sorted_data = df.sort_values('servings', ascending=False)

grouped_data = df.groupby('title')['servings'].mean()


In [None]:
# Create a basic classification model

X = df[['time', 'servings']]  
y = df['title']  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


classifier = DecisionTreeClassifier()
classifier.fit(X_train, y_train)


y_pred = classifier.predict(X_test)


accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

In [None]:
# Create a basic regression model

X = df[['time', 'servings']]  
y = df['instructions'] 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

regressor = LinearRegression()
regressor.fit(X_train, y_train)


y_pred = regressor.predict(X_test)


mse = mean_squared_error(y_test, y_pred)
print('Mean Squared Error:', mse)