In [6]:
# Gradient Boost 
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score

# Load the historical data
data = pd.read_csv("C:/Users/nigan/cleaned_wine_data.csv")

# Drop rows with NaN values in the features and target
data_cleaned = data.dropna()

# Define features and target
features = ['points', 'price']  
target = 'price' 

# Separate features (X) and (y)
X = data_cleaned[features]
y = data_cleaned[target]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Gradient Boosting Regressor model
model = GradientBoostingRegressor(n_estimators=100, random_state=42)

# Fit the model on the training data
model.fit(X_train, y_train)

# Predict the 'price' on the test data
y_pred = model.predict(X_test)

# Calculate R-squared value
r2 = r2_score(y_test, y_pred)

# Find the indices of the top 10 predictions
top_10_indices = np.argsort(y_pred)[:10]

print("Top 10 predicted wines with highest points and lowest price:")
for index in top_10_indices:
    predicted_wine = data_cleaned.iloc[index]
    print("Title:", predicted_wine['title'])
    print("Points:", predicted_wine['points'])
    print("Price:", predicted_wine['price'])
    print("Wine Type:", predicted_wine['wine_type'])
    print("--------")

print("R-squared:", r2)


Top 10 predicted wines with highest points and lowest price:
Title: Lava Cap 2013 Estate Bottled Cabernet Sauvignon (El Dorado)
Points: 90
Price: 26.0
Wine Type: red
--------
Title: Steven Kent 2010 Home Ranch Vineyard Cabernet Sauvignon (Livermore Valley)
Points: 87
Price: 65.0
Wine Type: red
--------
Title: Le Vigne 2014 Nikiara Red (Paso Robles)
Points: 88
Price: 45.0
Wine Type: red
--------
Title: Amador Cellars 2013 Cooper & Karmere Vineyards Barbera (Amador County)
Points: 90
Price: 32.0
Wine Type: red
--------
Title: La Crema 2015 Kelli Ann Vineyard Chardonnay (Russian River Valley)
Points: 91
Price: 40.0
Wine Type: white
--------
Title: Von Strasser 2012 Estate Cabernet Sauvignon (Diamond Mountain District)
Points: 90
Price: 80.0
Wine Type: red
--------
Title: Ded.Reckoning 2013 No. 12 Reserve Red (Red Mountain)
Points: 89
Price: 40.0
Wine Type: red
--------
Title: HandCraft 2014 Artisan Collection Chardonnay (California)
Points: 86
Price: 10.0
Wine Type: white
--------
Title: 

In [7]:
# Cat Boost 
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from catboost import CatBoostRegressor
from sklearn.metrics import r2_score

# Load the historical data
data = pd.read_csv("C:/Users/nigan/cleaned_wine_data.csv")

# Drop rows with NaN values in the features and target
data_cleaned = data.dropna()

# Define features and target
features = ['points', 'price']  
target = 'price' 

# Separate features (X) and (y)
X = data_cleaned[features]
y = data_cleaned[target]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a CatBoost Regressor model
model = CatBoostRegressor(iterations=100, random_seed=42, verbose=0)

# Fit the model on the training data
model.fit(X_train, y_train)

# Predict the 'price' on the test data
y_pred = model.predict(X_test)

# Calculate R-squared value
r2 = r2_score(y_test, y_pred)

# Find the indices of the top 10 predictions
top_10_indices = np.argsort(y_pred)[:10]

print("Top 10 predicted wines with highest points and lowest price:")
for index in top_10_indices:
    predicted_wine = data_cleaned.iloc[index]
    print("Title:", predicted_wine['title'])
    print("Points:", predicted_wine['points'])
    print("Price:", predicted_wine['price'])
    print("Wine Type:", predicted_wine['wine_type'])
    print("--------")

print("R-squared:", r2)


Top 10 predicted wines with highest points and lowest price:
Title: Lava Cap 2013 Estate Bottled Cabernet Sauvignon (El Dorado)
Points: 90
Price: 26.0
Wine Type: red
--------
Title: Von Strasser 2012 Estate Cabernet Sauvignon (Diamond Mountain District)
Points: 90
Price: 80.0
Wine Type: red
--------
Title: Piña 2012 Buckeye Vineyard Cabernet Sauvignon (Howell Mountain)
Points: 92
Price: 85.0
Wine Type: red
--------
Title: Le Vigne 2014 Nikiara Red (Paso Robles)
Points: 88
Price: 45.0
Wine Type: red
--------
Title: La Crema 2015 Kelli Ann Vineyard Chardonnay (Russian River Valley)
Points: 91
Price: 40.0
Wine Type: white
--------
Title: Krutz 2013 Magnolia Inspiration Vineyard Chardonnay (Russian River Valley)
Points: 90
Price: 20.0
Wine Type: white
--------
Title: Borjón 2012 Seleccion Barbera (Amador County)
Points: 90
Price: 36.0
Wine Type: red
--------
Title: Amador Cellars 2013 Cooper & Karmere Vineyards Barbera (Amador County)
Points: 90
Price: 32.0
Wine Type: red
--------
Title: D