### Content-based Recommendation System

In [1]:
import pandas as pd
import numpy as np
pd.options.display.max_columns = 500

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [2]:
groups = pd.read_csv("/dbfs/FileStore/tables/groups_austin.csv")

In [3]:
groups[groups["category.name"] == "food/drink"]

Unnamed: 0,id,category.name,name,organizer.member_id,topics,description
38,272321,food/drink,Austin Wine Enthusiasts,1953748.0,"[{'urlkey': 'wine', 'name': 'Wine', 'id': 611}...",<p>Welcome to the Austin Wine Enthusiasts Meet...
39,366779,food/drink,Austin's Best - Craft Beer Group,245413651.0,"[{'urlkey': 'singles', 'name': 'Singles', 'id'...",<p>Meet with other local Craft Beer drinkers t...
40,404452,food/drink,"Austin Raw Food Diet, Vegan Potluck and Inform...",3618240.0,"[{'urlkey': 'vegetarian', 'name': 'Vegetarian'...",<p>Are you interested in meeting others who sh...
41,458982,food/drink,Central Texas Foodies,189660465.0,"[{'urlkey': 'wine', 'name': 'Wine', 'id': 611}...",<p>Getting together locally with other people ...
42,488712,food/drink,VegAustin,10775897.0,"[{'urlkey': 'vegetarian', 'name': 'Vegetarian'...",<p>Welcome to VegAustin!&nbsp;We welcome vegan...
43,648177,food/drink,Capital Confectioners Club,1042005.0,"[{'urlkey': 'cakedecorating', 'name': 'Cake De...","<p>Capital Confectioners, a greater Austin Cak..."
44,1255991,food/drink,Fearless Foodies,5183146.0,"[{'urlkey': 'diningout', 'name': 'Dining Out',...",<p>Meetup with other Austin area foodies who s...
45,1279432,food/drink,"Austin Eastside Cooking, Potlucks and Get-toge...",8643052.0,"[{'urlkey': 'weightloss', 'name': 'Weight Loss...",<p>Let's get together and share some good food...
46,1521037,food/drink,Austin Food and Wine Lovers,29885072.0,"[{'urlkey': 'wine', 'name': 'Wine', 'id': 611}...",<p>The Austin Food and Wine Lovers believe tha...
47,1550794,food/drink,VegAustin Potlucks,10775897.0,"[{'urlkey': 'environment', 'name': 'Environmen...",<p>We typically have two potlucks each month a...


In [4]:
groups = groups[["id","category.name","name","organizer.member_id","topics","description"]]

In [5]:
len(groups)

In [6]:
import json
import yaml
groups["topics"] = groups["topics"].apply(lambda x: yaml.load(x))

type(groups["topics"][0])

In [7]:
groups = groups["topics"].apply(pd.Series) \
    .merge(groups, right_index = True, left_index = True) \
    .drop(["topics"], axis = 1) \
    .melt(id_vars = ['id', 'category.name','name','organizer.member_id'], value_name = "topics") \
    .drop("variable", axis = 1)

In [8]:
groups = groups["topics"].apply(pd.Series) \
    .merge(groups, right_index = True, left_index = True)

In [9]:
groups = groups[["urlkey","id_y","category.name","name_y","organizer.member_id"]]

In [10]:
groups = groups.rename(columns = {'urlkey':'topic_key','id_y':'group_id','name_y':'group_name'})

In [11]:
groups.head()

Unnamed: 0,topic_key,group_id,category.name,group_name,organizer.member_id
0,classics,268001,fine arts/culture,The Austin Books and Chatter Group,2850525.0
1,cartoonists,1294488,fine arts/culture,Austin Life Drawing Meetup Group,7518535.0
2,artistsway,1633821,fine arts/culture,Austin Drink-n-Draw,11732501.0
3,musicians,1923631,fine arts/culture,Austin Arts Collective,7601460.0
4,urbanexplor,2295011,fine arts/culture,Urban Sketchers - Austin,13938061.0


In [12]:
groups = groups.dropna(subset = ['topic_key'])

In [13]:
groups[groups["group_id"]==268001]

Unnamed: 0,topic_key,group_id,category.name,group_name,organizer.member_id
0,classics,268001,fine arts/culture,The Austin Books and Chatter Group,2850525.0
2891,bookclub,268001,fine arts/culture,The Austin Books and Chatter Group,2850525.0
5782,intellectual-discussion,268001,fine arts/culture,The Austin Books and Chatter Group,2850525.0
8673,fiction,268001,fine arts/culture,The Austin Books and Chatter Group,2850525.0
11564,literature,268001,fine arts/culture,The Austin Books and Chatter Group,2850525.0
14455,reading,268001,fine arts/culture,The Austin Books and Chatter Group,2850525.0
17346,novel-reading,268001,fine arts/culture,The Austin Books and Chatter Group,2850525.0


In [14]:
#group_id must also be of category type or else rows will be missing after pivoting table
groups['group_id'] = groups['group_id'].astype('category')

#creating a binary representation utility matrix
temp = groups.drop(['group_name','organizer.member_id'], axis = 1)
temp['dummy'] = 1
temp = pd.pivot_table(temp, index= 'group_id', columns = ['topic_key'], values = 'dummy', fill_value = 0)

In [15]:
#to setup dataframe x that will allow me to map back index to the group title
x = groups.drop_duplicates(subset = 'group_id')
x = x.set_index('group_id')

#joining the category shortname with the pivot table and using get dummies for one hot encoding to have only one level
#of columns
temp = pd.get_dummies(temp.join(x['category.name']), prefix = '', prefix_sep = '')

In [16]:
x.head()

Unnamed: 0,topic_key,category.name,group_name,organizer.member_id
0,classics,fine arts/culture,The Austin Books and Chatter Group,2850525.0
1,cartoonists,fine arts/culture,Austin Life Drawing Meetup Group,7518535.0
2,artistsway,fine arts/culture,Austin Drink-n-Draw,11732501.0
3,musicians,fine arts/culture,Austin Arts Collective,7601460.0
4,urbanexplor,fine arts/culture,Urban Sketchers - Austin,13938061.0


In [17]:
temp.head()

Unnamed: 0_level_0,1031-real-estate-exchange,10th-amendment,10th-amendment-state-sovereignty,12-step-alternatives,13th-age,15-minute-female-orgasm,2-meter-ham-radio-operators,20-somethings,20s,20s-30s-40s-social-group,20s-30s-social,20s-and-30s,20s-social,24-7-customer-service-support,2600,2d-animation,2nd-american-revolution,30-to-40s-group,30s,30s-40s-social,30s-and-40s,30s-and-40s-fun-loving-desis,3d,3d-animation,3d-artists,3d-gaming,3d-modeling,3d-printing,3dr,4-hour-work-week,4-wheeling-and-camping,4-x-4,4-x-4-trail-riding,40-and-older,40s-and-50s-social-community,420,420-friends,4square,4x4s,5-0-mustangs,5000-year-leap-book-discussions,80s-dancing,80s-movies,80s-music,80s-pop,80s-rock-concerts,80s-rock-music,80smusic,Euro-games,Lesbian,Wakesurfing,_type_-theory,a-capella,a-course-in-miracles-acim,a-course-in-miracles-life,a-course-in-miracles-study-and-sharing-group,a-course-in-miracles-study-group,a-course-in-miracles-thursday-sunday-study-group,a-social-group-for-hiv-positive-people,a-spiritual-path-to-higher-creativity,a11y,abrahamhicks,abstract-art,abundance-and-prosperity,abundance-joy-happiness,abusesurvivors,ac-christians,acapella,acapella-group,acappella,acappella-singing,accelerated-learning-learning-how-to-learn,acceptance,accepting-and-celebrating-a-childfree-life,accessibility,account-based-marketing,accountant,accounting,accounting-saas,accounting-service-consultant,accounting-students,accountsoftware,ace,achieving-your-goals,acim,acim-disappearance-of-the-universe,acim-group,acing-job-interviews-and-job-interview-practice,acoustic-guitar,acoustic-guitar-teachers,acoustic-music,acquired-neuropathy,acro-balancing,acro-yoga,acrobatics,acroyoga,acting,action-sports,active-lifestyle,active-medical-marijuana-patients,active-parenting,active-parents,active-seniors,activedogs,activism,activity-group,activity-monitors-and-devices,acts-of-kindness,acumatica,acupuncture,acustical-music-music-jam-sessions-vinyl-records,addiction,additive-manufacturing,adhd,adhd-support,adminasst,administrative-professionals,adobe,adobe-aem,adobe-cq,adobe-cq5,adobe-creative-suite,adobe-experience-manager,adobe-illustrator,adobe-photoshop,adobe-user-group,adoptee-reunion-support,adoptee-support,adoptees,adoptees-looking-for-reunion,adoption,adoption-reunion-search,adp,adrenal-fatigue,adult-adoptees,adult-amateur-baseball,adult-aspies,adult-baseball,adult-halloween-activities,adult-indigo-crystal-and-rainbow-children,adult-music-lessons,adult-paper-craft-classes,adult-self-defense,adult-social-sports,adult-sports-league,adult-survivors-of-child-abuse,adult-teens-with-asperger-spectrum-disorders,adultindigo,adulting,adults-abused-as-children,adults-adhd-support-group,adults-taking-care-of-aging-parents,adults-with-add-adhd,adults-with-adhd,adults-with-aspergers-syndrome,adults-with-dyspraxia,adults-with-special-needs-disabilities,advaita-vedanta,advanced-algorithms-data-structure-design-pattern,advanced-financial-planning-tax-strategies,advanced-manufacturing-engineering,advanced-photography-workshops,advanced-scuba-diving,advanced-sql-server,adventure,adventure-hiking,adventure-travel,advertising,advertising-and-marketing-digital-and-print,advertising-and-pr,advocacy,adwords,adyashanti,aem,aerial,aerial-cinematography,aerial-performance-art,aerial-photography,aerial-silks,aerial-videography,affenpinscher,affiliate-marketing,affordable-housing,afghanhound,afghanistan-veterans,afh,africa-travel,africa-travel-adventure,african-american,african-american-authors,african-american-families,african-american-professionals,african-american-women,african-authors,african-dance,african-diaspora,african-drumming,african-food,african-men-and-women,african-professionals,african-singles,africanmusic,africans,after-business-hours-networking-meet-up-group,after-death-communications,afterlife,afterlife-and-reincarnation,afternoon-tea-tea-lovers,agda,agents,agile-and-scrum,agile-business-analysts,agile-coaching,agile-leadership,agile-marketing,agile-portfolio-management,agile-program-management,agile-project-management,agile-testing,agile-transformation,aging-parent-caregivers,aging-parent-caretakers,agnostic,agnosticparents,agnostics,agorism,agriculture,ah-women-chorus-or-ah-women-choir,ai,aikido,aikido-self-defense-classes-private-group,aikido-spirituality,aikido-weapons-classes,airbnb,airbnb-host,airbrush-tattoo,aircraft,aircraft-owners,airplanes,akka,alcohol-and-drug-free,alcohol-free-activities,alcohol-moderation,alcoholics,alcoholics-anonymous-alternatives,alcoholism,algorithmic-trading,algorithms,algorithms-and-computational-theory,all-about-cats-and-kittens,...,women-over-50,women-over-60,women-owned-business,women-programmers,women-sacred-circle,women-self-empowerment-and-exploration,women-small-business-owners,women-social-networking-empowerment-mentor,women-software-developers,women-solo-travel,women-supporting-and-empowering-women,women-supporting-other-women,women-tech-makers,women-veterans,women-veterans-and-military-sexual-trauma,women-veterans-and-ptsd,women-veterans-and-their-families,women-veterans-community-outreach,women-veterans-empowerment,women-veterans-family-and-friends,women-veterans-of-all-military-conflict-eras,women-walking,women-wellness,women-who-code,women-who-like-beer,women-who-love-to-cuckold-their-men,women-who-love-to-travel,women-who-travel,women-with-anxiety-depression,womens-a-cappella-singing,womens-adventure,womens-barbershop,womens-book-club,womens-bookclub,womens-business-connections,womens-business-networking,womens-camping,womens-chorus,womens-circle,womens-circles,womens-climbing,womens-empowerment,womens-empowerment-circle,womens-energy-circle,womens-fitness,womens-fitness-and-social,womens-friendship-circle,womens-golf,womens-group-travel,womens-healing-circle,womens-healing-empowerment,womens-health,womens-health-and-wellness,womens-health-wealth,womens-mountain-biking,womens-movies,womens-networking,womens-rights,womens-rugby,womens-running,womens-self-defense,womens-self-empowerment,womens-sexuality,womens-soccer,womens-social-30-40,womens-social-over-50,womens-support,womens-surfing,womens-wisdom-healing,womensports,woocommerce,wood-crafts,wood-sculpting,wood-turning,woodwork,word-games,wordpress,wordpress-customization,wordpress-developers,wordpress-for-business,wordpress-plugins,wordpress-seo,wordpress-small-business-web-sites,wordpress-themes,wordpress-users,wordpress-websites,work-and-spirituality,work-at-home-dads,work-at-home-ebay-sellers,work-at-home-parents,work-at-home-work-at-home-moms,work-from-home-mothers,work-hazards,work-life-balance,workathome,worker-ownership,working-abroad,working-dads,working-mom-attorneys,working-parents,workingmoms,workout,workout-bootcamp,workout-buddies,workplace-safety,workshop,workshop-facilitation,world-cinema,world-classical-music,world-peace,worldmusic,worldreligions,worship-gatherings,wounded-soldiers-and-disabled-veterans,wrestling,wrestling-grappling,write-workshop-and-finish-writing-a-screenplay,writer,writers-and-aspiring-writers,writers-and-writing,writers-critique-group,writersblock,writing,writing-and-publishing,writing-and-singing-songs,writing-critique,writing-for-blogs,writing-for-pleasure,writing-for-youth,writing-lyrics,writing-sex,writing-workshops,wta,wwc,wwdyer,xbox,xbox-one,xc-mountain-biking,xcode,xero-accounting-software,xp,yachting,yale-university,yamaha-motorcycles,yankees,yaoi,yarn,yarnswap,yii-framework,yin-yoga,yoga,yoga-asanas-and-pranayama,yoga-beginners,yoga-community,yoga-detox,yoga-for-athletes,yoga-for-beginners,yoga-for-men,yoga-for-seniors,yoga-in-the-park,yoga-outdoors,yoga-pranayama,yoga-stretching-and-laughter,yoga-workshops,young,young-adult,young-adults,young-adults-with-autism-social-activities,young-adults-with-special-needs,young-african-professionals,young-black-professionals,young-breast-cancer-survivors,young-business-professionals,young-couples,young-entrepreneur,young-indians,young-latino-professionals,young-married-couples,young-moms,young-onset-parkinsons,young-parents,young-physicians,young-professional-networking,young-professional-singles,young-professional-women,young-professionals,young-widows-and-widowers,young-women,young-women-affected-by-breast-cancer,young-women-grieving-the-loss-of-their-mother,young-womens-book-club,youngdems,youngrepublicans,your-4-ecommerce-shipping-options,your-journey-to-self-love-is-always-evolving,your-thriving-life,youtube,youtube-for-businesses-entrepreneurs,youtube-marketing,youtube-partnership-program,yuri,zen-buddhism,zen-meditation,zen-practice,zend-framework,zenpoetry,zentai,zero-waste,zilker,zoho-crm,zombies,zoos,zouk,zouk-lambada,zumba,zumba-classes,zumba-events,LGBT,alternative lifestyle,book clubs,career/business,cars/motorcycles,community/environment,dancing,education/learning,fashion/beauty,fine arts/culture,fitness,food/drink,games,health/wellbeing,hobbies/crafts,language/ethnic identity,movements/politics,movies/film,music,new age/spirituality,outdoors/adventure,paranormal,parents/family,pets/animals,photography,religion/beliefs,sci-fi/fantasy,singles,socializing,sports/recreation,support,tech,writing
group_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1,Unnamed: 352_level_1,Unnamed: 353_level_1,Unnamed: 354_level_1,Unnamed: 355_level_1,Unnamed: 356_level_1,Unnamed: 357_level_1,Unnamed: 358_level_1,Unnamed: 359_level_1,Unnamed: 360_level_1,Unnamed: 361_level_1,Unnamed: 362_level_1,Unnamed: 363_level_1,Unnamed: 364_level_1,Unnamed: 365_level_1,Unnamed: 366_level_1,Unnamed: 367_level_1,Unnamed: 368_level_1,Unnamed: 369_level_1,Unnamed: 370_level_1,Unnamed: 371_level_1,Unnamed: 372_level_1,Unnamed: 373_level_1,Unnamed: 374_level_1,Unnamed: 375_level_1,Unnamed: 376_level_1,Unnamed: 377_level_1,Unnamed: 378_level_1,Unnamed: 379_level_1,Unnamed: 380_level_1,Unnamed: 381_level_1,Unnamed: 382_level_1,Unnamed: 383_level_1,Unnamed: 384_level_1,Unnamed: 385_level_1,Unnamed: 386_level_1,Unnamed: 387_level_1,Unnamed: 388_level_1,Unnamed: 389_level_1,Unnamed: 390_level_1,Unnamed: 391_level_1,Unnamed: 392_level_1,Unnamed: 393_level_1,Unnamed: 394_level_1,Unnamed: 395_level_1,Unnamed: 396_level_1,Unnamed: 397_level_1,Unnamed: 398_level_1,Unnamed: 399_level_1,Unnamed: 400_level_1,Unnamed: 401_level_1,Unnamed: 402_level_1,Unnamed: 403_level_1,Unnamed: 404_level_1,Unnamed: 405_level_1,Unnamed: 406_level_1,Unnamed: 407_level_1,Unnamed: 408_level_1,Unnamed: 409_level_1,Unnamed: 410_level_1,Unnamed: 411_level_1,Unnamed: 412_level_1,Unnamed: 413_level_1,Unnamed: 414_level_1,Unnamed: 415_level_1,Unnamed: 416_level_1,Unnamed: 417_level_1,Unnamed: 418_level_1,Unnamed: 419_level_1,Unnamed: 420_level_1,Unnamed: 421_level_1,Unnamed: 422_level_1,Unnamed: 423_level_1,Unnamed: 424_level_1,Unnamed: 425_level_1,Unnamed: 426_level_1,Unnamed: 427_level_1,Unnamed: 428_level_1,Unnamed: 429_level_1,Unnamed: 430_level_1,Unnamed: 431_level_1,Unnamed: 432_level_1,Unnamed: 433_level_1,Unnamed: 434_level_1,Unnamed: 435_level_1,Unnamed: 436_level_1,Unnamed: 437_level_1,Unnamed: 438_level_1,Unnamed: 439_level_1,Unnamed: 440_level_1,Unnamed: 441_level_1,Unnamed: 442_level_1,Unnamed: 443_level_1,Unnamed: 444_level_1,Unnamed: 445_level_1,Unnamed: 446_level_1,Unnamed: 447_level_1,Unnamed: 448_level_1,Unnamed: 449_level_1,Unnamed: 450_level_1,Unnamed: 451_level_1,Unnamed: 452_level_1,Unnamed: 453_level_1,Unnamed: 454_level_1,Unnamed: 455_level_1,Unnamed: 456_level_1,Unnamed: 457_level_1,Unnamed: 458_level_1,Unnamed: 459_level_1,Unnamed: 460_level_1,Unnamed: 461_level_1,Unnamed: 462_level_1,Unnamed: 463_level_1,Unnamed: 464_level_1,Unnamed: 465_level_1,Unnamed: 466_level_1,Unnamed: 467_level_1,Unnamed: 468_level_1,Unnamed: 469_level_1,Unnamed: 470_level_1,Unnamed: 471_level_1,Unnamed: 472_level_1,Unnamed: 473_level_1,Unnamed: 474_level_1,Unnamed: 475_level_1,Unnamed: 476_level_1,Unnamed: 477_level_1,Unnamed: 478_level_1,Unnamed: 479_level_1,Unnamed: 480_level_1,Unnamed: 481_level_1,Unnamed: 482_level_1,Unnamed: 483_level_1,Unnamed: 484_level_1,Unnamed: 485_level_1,Unnamed: 486_level_1,Unnamed: 487_level_1,Unnamed: 488_level_1,Unnamed: 489_level_1,Unnamed: 490_level_1,Unnamed: 491_level_1,Unnamed: 492_level_1,Unnamed: 493_level_1,Unnamed: 494_level_1,Unnamed: 495_level_1,Unnamed: 496_level_1,Unnamed: 497_level_1,Unnamed: 498_level_1,Unnamed: 499_level_1,Unnamed: 500_level_1,Unnamed: 501_level_1
10023,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
32400,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
39554,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
57662,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
67677,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [18]:
#compute the similarity between groups
from sklearn.metrics.pairwise import cosine_similarity
sim_scores = cosine_similarity(temp)

In [19]:
#reset index to map back group_id recommendation to group names
x = x.reset_index(drop = True)

#created to retrieve idx from input group name of content base function
y = pd.Series(x.index, index = x['group_name'])

In [20]:
y.head(100)

In [21]:
def content_base(group_name, scores = sim_scores):
    """This function takes in a group name as a string and returns a list of groups that are most similar to the input."""
    indices = list()
    score = list()
    #grabbing the index of the group
    idx = y[group_name]
    
    #find the row of the score matrix using the index and using enumerate to keep track the index of the group similiar
    #to it
    row_score = list(enumerate(scores[idx]))
    
    #sort the row score by similarity score from highest to lowest
    sorted_score = sorted(row_score, key = lambda p: p[1], reverse = True)
    
    for i in sorted_score:
        indices.append(i[0])
        score.append(i[1])
    
    #return sorted_score
    #return the top five most similar groups
    return list(zip(list(x.iloc[indices[1:6],2]), score))

In [22]:
content_base('Austin Drink-n-Draw')

In [23]:
content_base('Open Architecture Austin')

In [24]:
content_base('Austin Wine Tastings')

In [25]:
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')

In [26]:
tfidf_matrix = tf.fit_transform(groups['description'])
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)
results = {} # dictionary created to store the result in a dictionary format (ID : (Score,item_id))#
for idx, row in groups.iterrows(): #iterates through all the rows

# the below code 'similar_indice' stores similar ids based on cosine similarity. sorts them in ascending order. [:-5:-1] is then used so that the indices with most similarity are got. 0 means no similarity and 1 means perfect similarity#
    similar_indices = cosine_similarities[idx].argsort()[::-1] 
    #similar_indices = cosine_similarities[idx].argsort()

    similar_items = [(cosine_similarities[idx][i], groups['id'][i]) for i in similar_indices]
    results[row['id']] = similar_items[1:]
    
#below code 'function item(id)' returns a row matching the id along with Book Title. Initially it is a dataframe, then we convert it to a list#
def item(id):
    return groups.loc[groups['id'] == id]['name'].tolist()[0]
def recommend(id, num):
    if (num == 0):
        print("Unable to recommend any group as you have not chosen the number of groups to be recommended")
    elif (num==1):
        print("Recommending " + str(num) + " group similar to " + item(id))
        
    else :
        print("Recommending " + str(num) + " groups similar to " + item(id))
        
    print("----------------------------------------------------------")
    recs = results[id][:num]
    for rec in recs:
        print("You may also like to visit events of the group: " + item(rec[1]) + " (score:" + str(rec[0]) + ")")

#the first argument in the below function to be passed is the id of the book, second argument is the number of books you want to be recommended#
recommend(272321,5)

In [27]:
test_list = np.array([1,3,4,5,6,8,1,.1,67,86,94,-17])

In [28]:
test_list.argsort()