In [1]:
import pandas as pd
import numpy as np
import googlemaps
import string
from pymystem3 import Mystem

In [2]:
gmaps = googlemaps.Client(key='TOKEN')

## Места

In [3]:
location = '55.799210,49.120476'
queries = ['парк развлечений',
         'развлечения',
         'океанариум',
         'дельфинариум',
         'боулинг',
         'библиотека',
         'кино',
         'кинотеатр',
         'кафе',
         #'церковь',
         #'монастырь',
         'ночной клуб',
         'клуб',
         'парк',
         'сквер',
         'достопримечательности',
         'музей',
         'ресторан',
         'зоопарк',
         'галерея',
         'выставка',
         'концерт',
         'бар',
         'фастфуд',
        ]

In [4]:
df = pd.DataFrame(columns=['place_id',
                           'name',
                           'location_lat',
                           'location_lng',
                           'types',
                           'rating',
                           'total_reviews', 
                           'address'])

In [5]:
for query in queries:
    page_token = ''
    
    while True:
        try:
            batch = gmaps.places(query=query, location=location, radius=8000, language='ru', page_token=page_token)
        except:
            break
        try:
            page_token = batch['next_page_token']
        except KeyError:
            break
            
        for place in batch['results']:
            place_id = place['place_id']
            if df['place_id'].isin([place_id]).any():
                continue
            name = place['name']
            location_lat = place['geometry']['location']['lat']
            location_lng = place['geometry']['location']['lng']
            types = ','.join(place['types'])
            try:
                rating = place['rating']
                total_reviews = place['user_ratings_total']
            except KeyError:
                rating = None
                total_reviews = None

            address = place['formatted_address']

            row = {'place_id': place_id,
                   'name': name,
                   'location_lat': location_lat,
                   'location_lng': location_lng,
                   'types': types,
                   'rating': rating,
                   'total_reviews': total_reviews, 
                   'address': address}

            df = df.append(row, ignore_index=True)

In [6]:
df['is_amusement_park'] = df['types'].apply(lambda x: 1 if 'amusement_park' in x else 0)
df['is_aquarium'] = df['types'].apply(lambda x: 1 if 'aquarium' in x else 0)
df['is_art_gallery'] = df['types'].apply(lambda x: 1 if 'art_gallery' in x else 0)
df['is_bowling_alley'] = df['types'].apply(lambda x: 1 if 'bowling_alley' in x else 0)
df['is_library'] = df['types'].apply(lambda x: 1 if 'library' in x else 0)
df['is_movie_theater'] = df['types'].apply(lambda x: 1 if 'movie_theater' in x else 0)
df['is_mosque'] = df['types'].apply(lambda x: 1 if 'mosque' in x else 0)
df['is_cafe'] = df['types'].apply(lambda x: 1 if 'cafe' in x else 0)
df['is_church'] = df['types'].apply(lambda x: 1 if 'church' in x else 0)
df['is_night_club'] = df['types'].apply(lambda x: 1 if 'night_club' in x else 0)
df['is_park'] = df['types'].apply(lambda x: 1 if 'park' in x else 0)
df['is_point_of_interest'] = df['types'].apply(lambda x: 1 if 'point_of_interest' in x else 0)
df['is_museum'] = df['types'].apply(lambda x: 1 if 'museum' in x else 0)
df['is_restaurant'] = df['types'].apply(lambda x: 1 if 'restaurant' in x else 0)
df['is_zoo'] = df['types'].apply(lambda x: 1 if 'zoo' in x else 0)

In [7]:
df.to_csv('df_raw.csv', index=False)

In [8]:
df = pd.read_csv('df_raw.csv')

In [9]:
len(df)

337

## Отзывы

In [10]:
fields = ['review']

In [11]:
df_reviews = pd.DataFrame(columns=['place_id', 'review_text', 'rating'])

In [12]:
for place_id in df['place_id']:
    reviews = gmaps.place(place_id=place_id, fields=fields, language='ru')['result']
    
    try:
        for review in reviews['reviews']:
            if review['text']:
                text = review['text']
            else:
                continue
            rating = review['rating']

            row = {'place_id': place_id,
                   'review_text': text,
                   'rating': rating}

            df_reviews = df_reviews.append(row, ignore_index=True)
    except KeyError:
        continue

In [13]:
df_reviews.to_csv('df_reviews.csv', index=False)

In [14]:
df_reviews = pd.read_csv('df_reviews.csv')

In [15]:
m = Mystem()

def preprocess(text):
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = text.lower()
    lemmas = m.lemmatize(text)
    text = (''.join(lemmas)).strip()
    return text

In [16]:
df_reviews['review_text'] = df_reviews['review_text'].apply(preprocess)

In [17]:
def add_tag(word, name, df):
    df_t = df.merge(df_reviews['place_id'][df_reviews['review_text'].str.contains(word)], on='place_id')
    df_t[name] = 1
    df = df.merge(df_t[['place_id', name]], on='place_id', how='left')
    df[name] = df[name].fillna(0)
    df[name] = df[name].astype(int)
    
    return df

In [18]:
df = add_tag('семья', 'family', df)
df = add_tag('девушка', 'romantic1', df)
df = add_tag('парень', 'romantic2', df)
df['romantic'] = df['romantic1'] + df['romantic2']
df['romantic'] = df['romantic'].apply(lambda x: 1 if x >= 1 else 0)
df = df.drop(columns=['romantic1', 'romantic2'])
df = add_tag('дешево', 'cheap', df)
df = add_tag('дети', 'kids', df)
df = add_tag('дорого', 'expensive', df)
df = add_tag('друг', 'friends', df)

df = df.drop_duplicates(subset=['place_id'])

In [19]:
df.to_csv('df_final.csv', index=False)