In [1]:
from sklearn.cluster import KMeans
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from matplotlib import pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('train_final.csv')
df.head()


Unnamed: 0,city_name,activity_name,activity_description,activity_location,latitude,longitude,google_rating,category,families,couples,...,child-friendly,21+,cost,ada_accessibility,interactivity,atmosphere,estimated_duration,reservation_needed,rare_find,person_responses
0,Seattle,Blue Moon Tavern,A historic bar known for its eclectic atmosphe...,"712 NE 45th St, Seattle, WA 98105",47.6614,-122.319,4.4,Drinks,0,1,...,0,1,Medium,1,Self-guided,Bustling,2.0,0,1,0
1,Boston,Boston Brewery Tours,Explore Boston's thriving craft beer scene wit...,Various Locations,42.3601,-71.0589,4.7,Drinks,0,0,...,0,1,Medium,1,Guided,Exciting,3.0,1,1,0
2,Dallas,Cidercade Dallas,Arcade with over 140 games and 24 ciders on tap,2777 Irving Blvd.,32.8,-96.8475,4.8,Drinks,1,1,...,0,1,Low,1,Hands-on,Exciting,2.5,0,1,1
3,Dallas,Deep Ellum Brewing Co.,Brewery offering tours and tastings of craft b...,2823 St Louis St.,32.7829,-96.7833,4.5,Drinks,0,0,...,0,1,Medium,0,Guided,Bustling,1.5,0,0,0
4,Boston,Isabella’s,A cozy speakeasy bar with craft cocktails and ...,"1 Franklin St, Boston, MA 02110",42.3556,-71.0603,4.5,Drinks,0,1,...,0,1,Medium,1,Self-guided,Relaxing,1.5,0,0,0


In [3]:
from sklearn.model_selection import train_test_split
# Split the data into train set (80%) and test set (20%)
columns_to_keep = [ 'category', 'families', 'couples', 'solo', 'child-friendly', '21+', 'cost', 'ada_accessibility','interactivity', 'atmosphere', 'estimated_duration', 'reservation_needed', 'rare_find', 'person_responses']
full_data_set = df[columns_to_keep]
#train_set_combined, valid_set = train_test_split(full_data_set, test_size=0.2, random_state=41)
train_set, test_set = train_test_split(full_data_set, test_size=0.2, random_state=42)
print(train_set.shape)
#print(valid_set.shape)
print(test_set.shape)

(264, 14)
(66, 14)


In [4]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer

In [5]:
locations_train = train_set.drop("person_responses", axis=1) # drop labels for training set
train_labels = train_set["person_responses"].copy()
locations_test = test_set.drop("person_responses", axis=1) # drop labels for training set
test_labels = test_set["person_responses"].copy()

In [6]:
data_cat = full_data_set.select_dtypes(include='object')

In [7]:
cat_pipeline = Pipeline([
        ('imputer2', SimpleImputer(strategy="most_frequent")),
        ("encoder", OneHotEncoder(sparse_output=False,  handle_unknown="ignore")),
    ])

cat_attribs = list(data_cat)

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer

full_pipeline = ColumnTransformer([
        ("cat", cat_pipeline, cat_attribs),
    ])

train_prepared = full_pipeline.fit_transform(locations_train)
#validation_prepared = full_pipeline.transform(valid_set)
test_prepared = full_pipeline.transform(test_set)

In [8]:
# Logistic Regression Model

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

#load the following dataset

clf = LogisticRegression(max_iter=10000, random_state=0)
clf.fit(train_prepared, train_labels)

acc = accuracy_score(test_labels, clf.predict(test_prepared)) * 100
print(f"Logistic Regression model accuracy: {acc:.2f}%")

Logistic Regression model accuracy: 78.79%


In [9]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# Create a pipeline with scaler and logistic regression
pipe = make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000, solver='saga', tol=0.1))

# Create a parameter grid
param_grid = {
    'logisticregression__C': [0.1, 1, 10, 100],
    'logisticregression__penalty': ['l1', 'l2']
}

# Create GridSearchCV object
grid_search = GridSearchCV(pipe, param_grid=param_grid, cv=5)

# Fit the model
grid_search.fit(train_prepared, train_labels)

# Print best parameters and best score
print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)

Best Parameters: {'logisticregression__C': 100, 'logisticregression__penalty': 'l1'}
Best Score: 0.7197387518142235


In [10]:
cville_df = pd.read_csv('traindatahack - charlottesville data.csv')
cville_data_copy_df = df.copy()
cville_df.head()

Unnamed: 0,city_name,activity_name,activity_description,activity_location,latitude,longitude,google_rating,category,families,couples,...,child-friendly,21+,cost,ada_accessibility,interactivity,atmosphere,estimated_duration,reservation_needed,rare_find,person_responses
0,Charlottesville,Blue Ridge Parkway,"Scenic drive with overlooks, hiking trails, an...","Blue Ridge Parkway, VA",37.4333,-79.5,4.9,Park/Garden,1,1,...,0,1,Low,1,Self-guided,Relaxing,3.0,0,1,1
1,Charlottesville,Blue Ridge Tunnel Trail,Historic railroad tunnel turned into a hiking ...,"Afton, VA",38.0312,-78.4665,4.7,Park/Garden,1,1,...,0,1,Low,1,Self-guided,Relaxing,2.0,0,1,1
2,Charlottesville,Carter Mountain Orchard,Pick-your-own fruit orchard with stunning view...,"1435 Carters Mountain Trail, VA",37.9976,-78.4665,4.7,Food,1,1,...,0,1,Low,1,Self-guided,Relaxing,2.0,0,1,1
3,Charlottesville,Downtown Mall,"Pedestrian mall with shops, restaurants, and l...","E Main St, Charlottesville, VA",38.0306,-78.4812,4.6,Food,1,1,...,1,1,Low,1,Self-guided,Bustling,2.0,0,0,1
4,Charlottesville,IX Art Park,"Outdoor art park with murals, sculptures, and ...","522 2nd St SE, Charlottesville, VA",38.0296,-78.4784,4.5,Park/Garden,1,1,...,0,1,Low,1,Self-guided,Relaxing,1.5,0,1,1


In [11]:
columns_to_keep = [ 'category', 'families', 'couples', 'solo', 'child-friendly', '21+', 'cost', 'ada_accessibility','interactivity', 'atmosphere', 'estimated_duration', 'reservation_needed', 'rare_find', 'person_responses']
full_data_set = cville_df[columns_to_keep]
full_data_set.drop('person_responses', axis=1, inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  full_data_set.drop('person_responses', axis=1, inplace=True)


In [12]:
cville_test = full_pipeline.transform(full_data_set)

In [13]:
predictions = clf.predict(cville_test)
predictions


array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1])

In [14]:
column_names = df.columns
column_names

Index(['city_name', 'activity_name', 'activity_description',
       'activity_location', 'latitude', 'longitude', 'google_rating',
       'category', 'families', 'couples', 'solo', 'child-friendly', '21+',
       'cost', 'ada_accessibility', 'interactivity', 'atmosphere',
       'estimated_duration', 'reservation_needed', 'rare_find',
       'person_responses'],
      dtype='object')

In [15]:
FINAL_df = pd.DataFrame(columns=column_names)


In [16]:
for x in range(len(predictions)):
  if predictions[x] == 1:
    row_df = pd.DataFrame(cville_data_copy_df.iloc[[x]])
    FINAL_df = pd.concat([FINAL_df, row_df], ignore_index=True)

  FINAL_df = pd.concat([FINAL_df, row_df], ignore_index=True)


In [17]:
FINAL_df.head()

Unnamed: 0,city_name,activity_name,activity_description,activity_location,latitude,longitude,google_rating,category,families,couples,...,child-friendly,21+,cost,ada_accessibility,interactivity,atmosphere,estimated_duration,reservation_needed,rare_find,person_responses
0,Seattle,Blue Moon Tavern,A historic bar known for its eclectic atmosphe...,"712 NE 45th St, Seattle, WA 98105",47.6614,-122.319,4.4,Drinks,0,1,...,0,1,Medium,1,Self-guided,Bustling,2.0,0,1,0
1,Boston,Boston Brewery Tours,Explore Boston's thriving craft beer scene wit...,Various Locations,42.3601,-71.0589,4.7,Drinks,0,0,...,0,1,Medium,1,Guided,Exciting,3.0,1,1,0
2,Dallas,Cidercade Dallas,Arcade with over 140 games and 24 ciders on tap,2777 Irving Blvd.,32.8,-96.8475,4.8,Drinks,1,1,...,0,1,Low,1,Hands-on,Exciting,2.5,0,1,1
3,Dallas,Deep Ellum Brewing Co.,Brewery offering tours and tastings of craft b...,2823 St Louis St.,32.7829,-96.7833,4.5,Drinks,0,0,...,0,1,Medium,0,Guided,Bustling,1.5,0,0,0
4,Boston,Isabella’s,A cozy speakeasy bar with craft cocktails and ...,"1 Franklin St, Boston, MA 02110",42.3556,-71.0603,4.5,Drinks,0,1,...,0,1,Medium,1,Self-guided,Relaxing,1.5,0,0,0


In [18]:
import json
json_records = json.loads(FINAL_df.to_json(orient='records'))

In [19]:
json_records
print(type(json_records))

<class 'list'>


In [20]:
# !pip install anvil-uplink
# uncomment and run the above line, then comment out and run whole file

In [21]:
#!pip install anvil-uplink
import anvil.server
anvil.server.connect("server_QKJVIENGBSKEVZAI6XH5O2Y6-R3CA5QSEVWVHQ654")
@anvil.server.callable
def get_charlottesville_activities2():
  return json_records

Connecting to wss://anvil.works/uplink
Anvil websocket open
Connected to "Default Environment" as SERVER


In [22]:
anvil.server.wait_forever()
# vist this site: https://twin-noisy-degree.anvil.app/ and select charlottesville, va

KeyboardInterrupt: 