# Recommendation Engine for Events at the Burning Man Arts Festival

A simple (item-based) recommendation engine for events at the Burning Man Arts and Entertainment Festival. Unlike the events classifier which used a whole set of engineered features, in this case we're going to keep it simpler and only use TF-IDF generated features.

<a href='#recs'> Jump to the Recommendation Engine </a>

In [1]:
import pandas as pd;
import numpy as np;
import seaborn as sns;
import matplotlib.pyplot as plt;

import string, nltk, re, pprint

from functools import reduce
from tqdm import tqdm
from pylab import *;
from scipy import sparse
from random import random

from nltk.corpus   import stopwords
from nltk          import word_tokenize
from nltk.tokenize import RegexpTokenizer
from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA
from nltk.corpus import stopwords

from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, mean_absolute_error, mean_squared_error;
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV, RandomizedSearchCV

import lightgbm as lgb;

from wordcloud import WordCloud, STOPWORDS

eng_stopwords = set(stopwords.words("english"))

%matplotlib inline



# Construct Feature Engineered Dataset

In [85]:
events = pd.read_csv('raw_data/cleaned_up.csv')

# Only perform recommendations on a single year's data
events = events[events['Year'] == 2017].drop(['Year'], axis=1).reset_index(drop=True)

events_og = events.copy()

events.head()

Unnamed: 0,Description,Title,Hosted by Camp,Location,Type,Contact Email,URL,Located at Art,Sunday,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday2,Monday2
0,"Ever have a really bad idea? A really, really ...",Bad Idea Bar,Academy of Arts and Sciences,,Gathering/Party,,,,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,0,0
1,"21+ come in, have a cocktail made fresh. There...",Slow Burn Lounge,FANDANGO!,,Other,,,,midnight – noon,0,0,0,0,0,0,0,0
2,"Hookah, trading booth, curiosities, and cockta...",The Magic Lantern awaits you,Magic Lantern Society,citizenmeow,Gathering/Party,monicayuwu@gmail.com,,,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,0
3,Come by the Lesbian Lending Library to pick up...,Camp Beaverton's Lesbian-ish Lending Library,Beaverton,Camp Beaverton,Adult-oriented,campbeaverton@gmail.com,http://www.campbeaverton.org,,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,0
4,Come by Stag Camp and take the harrowing zipli...,Zipline!!,Stag Camp 11,8:15 & D,Other,,,,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.,midnight – 11:45 p.m.


In [86]:
le = LabelEncoder()
le.fit(events['Type'].values)
labels = le.classes_
y = le.transform(events['Type'].values) 

events["Description"] = (events["Description"].map(str) + ' ' + events["Title"].map(str) + ' ' + 
                         events["Hosted by Camp"].map(str) + ' ' + events["Location"].map(str))
events = events.drop(['Title', 'Hosted by Camp', 'Location'], axis=1)

count_vect_desc  = CountVectorizer(stop_words='english', min_df=20,  ngram_range=(1, 3), analyzer='word')
X = count_vect_desc.fit_transform(events['Description'].values);
iX_desc  = X.shape[1]
print(X.shape)

feature_names = ['str('+name+')' for name in count_vect_desc.get_feature_names()]

['Adult-oriented' 'Care/Support' 'Class/Workshop' 'Fire' 'Food' 'Game'
 'Gathering/Party' 'Kid-friendly' 'Other' 'Parade' 'Performance'
 'Ritual/Ceremony']


In [104]:
from sklearn.feature_extraction.text import TfidfTransformer

transformer = TfidfTransformer()

X = transformer.fit_transform(X)

<a id='recs'></a>

# Construct Similarity Matrix

Passing normalized TF-IDF values through the linear kernel function is identical to the cosine similarity

In [106]:
from sklearn.metrics.pairwise import linear_kernel

cossim = linear_kernel(X)

df_cossim = pd.DataFrame(cossim, columns = np.arange(len(cossim)))

df_cossim.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3486,3487,3488,3489,3490,3491,3492,3493,3494,3495
0,1.0,0.004261,0.0,0.0,0.0,0.010308,0.0,0.005108,0.00534,0.079137,...,0.005725,0.1233,0.033614,0.0,0.0,0.111989,0.006738,0.061478,0.003582,0.0
1,0.004261,1.0,0.016157,0.011114,0.022992,0.015744,0.034705,0.007802,0.028508,0.07411,...,0.108621,0.059232,0.006759,0.089974,0.0,0.025413,0.035969,0.0,0.005472,0.101826
2,0.0,0.016157,1.0,0.011058,0.08633,0.0,0.0,0.0,0.02025,0.084151,...,0.021707,0.008672,0.0,0.015106,0.065883,0.018051,0.025549,0.0,0.0,0.0
3,0.0,0.011114,0.011058,1.0,0.089611,0.0,0.010556,0.0,0.01393,0.052184,...,0.04998,0.005965,0.0,0.010391,0.033422,0.012417,0.017575,0.0,0.0,0.0
4,0.0,0.022992,0.08633,0.089611,1.0,0.0,0.048242,0.0,0.028817,0.213049,...,0.103396,0.012341,0.0,0.021497,0.069143,0.025688,0.036359,0.0,0.0,0.0


# Example 1

Say we pick some event, in this case, a Live Jazz event.

In [107]:
print(events_og.iloc[[213]][['Title', 'Hosted by Camp', 'Type', 'Description']].values[0])

['Jazz Jam session' 'Playa Jazz Cafe' 'Performance'
 'Playa Jazz Café house band concert followed Jam Session. Musicians and singers are invited to sit in with the band. Coffee, Jazz theme drinks, Live Jazz.']


We can take a look at the corresponding entry in the similarity matrix to identify the most similar events. The first entry is just the event itself.

In [108]:
df_cossim[213].reset_index(name='sim').sort_values('sim', ascending=False).head(5)

Unnamed: 0,index,sim
213,213,1.0
927,927,0.464242
704,704,0.437599
151,151,0.390288
3062,3062,0.3839


Let's take a look at the recommendations. Not too shabby, they all seem relevent.

In [147]:
print(events_og.iloc[[927]][['Title', 'Hosted by Camp', 'Type', 'Description']].values)

[['Bluegrass Onstage Jam Hour at Rootpile, come pick!' 'Rootpile'
  'Performance'
  'At Rootpile camp, the only Bluegrass music and hillbilly culture camp on the playa, we will have as part of our nightly show an onstage jam session at which people can get on stage and sit in if they are a Bluegrass musician or singer. Our house bands will start at 9 pm after the jam, and we will be serving pork bbq and pinto beans until we run out!!']]


In [148]:
print(events_og.iloc[[704]][['Title', 'Hosted by Camp', 'Type', 'Description']].values)

[['Live Band Karaoke' 'bEEcHARGE!!!' 'Performance'
  "Be the lead singer!  Join us for Year 3 of Live Band Karaoke on the Playa.  We'll supply the band, the words, and a microphone!"]]


In [149]:
print(events_og.iloc[[151]][['Title', 'Hosted by Camp', 'Type', 'Description']].values)

[['Drum Bass Workshop Part 1' 'Playa Jazz Cafe' 'Class/Workshop'
  'House Band Drummers and Bass players explain the roles of their instruments and techniques involved in Jazz improvisation. Refreshments for attendees.']]


# Example 2

Let's take a look at another example from the Public Library

In [114]:
print(events_og.iloc[[1852]][['Title', 'Hosted by Camp', 'Type', 'Description']].values[0])

['Book Group Smackdown' 'Black Rock Public Library' 'Other'
 'Tired of book groups where everyone reads the same book and discusses that one book in nice, polite language? Come to the BRPL Book Group, where everyone brings a different book (your favorite?!?) and argues that it is the best.\n\n\n\nYelling, cursing, logic and other forms of persuasion are encouraged, but not required.']


In [116]:
df_cossim[1852].reset_index(name='sim').sort_values('sim', ascending=False).head(5)

Unnamed: 0,index,sim
1852,1852,1.0
1435,1435,0.593094
334,334,0.470564
1647,1647,0.324589
1982,1982,0.318754


Sounds fun. I'll meet James Joyce at Burning Man?

In [151]:
print(events_og.iloc[[1435]][['Title', 'Hosted by Camp', 'Type', 'Description']].values)

[['"Author" Book Signing' 'Black Rock Public Library' 'Other'
  'For one hour only, you will have the opportunity to meet any author, dead or alive! Bring your own book, or check one out from our collection, and then bring it to the "Author" to get it signed and have a once-in-a-lifetime conversation with your literary hero.']]


In [152]:
print(events_og.iloc[[334]][['Title', 'Hosted by Camp', 'Type', 'Description']].values)

[['Comic Book Reading Room 2.0 with MIST' "Captain Pump's Raiders"
  'Care/Support'
  'Come inside our D20 dome to cool off, relax, and read some epic adventures with the finest in comic book reading material on all the playa.']]


In [153]:
print(events_og.iloc[[1647]][['Title', 'Hosted by Camp', 'Type', 'Description']].values)

[['Drunken Wizard Book Readings' 'Servants of the Secret Fire' 'Game'
  'Get drunk and read from the Secret Fire Library']]


# Joint Recommendations

If I have multiple events picked, I can multiply their similarity vectors together to get joint recommendations.

In [132]:
(df_cossim[213]*df_cossim[1852]).reset_index(name='sim').sort_values('sim', ascending=False).head(10)

Unnamed: 0,index,sim
297,297,0.018352
2381,2381,0.014031
334,334,0.012665
2509,2509,0.012265
1982,1982,0.011703
2310,2310,0.011653
8,8,0.009607
921,921,0.009523
1607,1607,0.009517
2390,2390,0.009167


Music AND reading? Perfect!

In [154]:
print(events_og.iloc[[297]][['Title', 'Hosted by Camp', 'Type', 'Description']].values)

[['Morning Wizard Music' 'Servants of the Secret Fire' 'Ritual/Ceremony'
  'Curated wizard music from the Secret Fire Sound Wizard.  Sit down, relax, and enjoy a book in our library.']]
