In [1]:
from google.colab import drive
drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [3]:
!pip install geopandas shapely -q

In [10]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import matplotlib.patheffects as path_effects
import geopandas as gpd
from shapely.geometry import Point, Polygon
from tqdm import tqdm, trange
import json

In [2]:
!unzip '/content/gdrive/MyDrive/data/train_dataset_train_data_Mediawise.zip'

Archive:  /content/gdrive/MyDrive/data/train_dataset_train_data_Mediawise.zip
   creating: train_data_Mediawise/
  inflating: train_data_Mediawise/readme.md  
  inflating: __MACOSX/train_data_Mediawise/._readme.md  
  inflating: train_data_Mediawise/baseline.ipynb  
  inflating: __MACOSX/train_data_Mediawise/._baseline.ipynb  
  inflating: train_data_Mediawise/train_data.json  


## Предобработка данных и создание нового датасета с обновленными полигонами

In [45]:
df=pd.read_json('/content/train_data_Mediawise/train_data.json')
df=pd.concat([df,pd.json_normalize(df['targetAudience'])], axis=1)
df=df.drop(['targetAudience','id'], axis=1)

In [46]:
def rename_name(row):
    gender = ''
    age_from = row['ageFrom']
    age_to = row['ageTo']
    income = row['income'].upper()

    if row['gender'] == 'all':
        gender = 'All'
    else:
        gender = row['gender'][0].upper()

    if gender == 'All' and age_from == 18 and age_to == 100 and income == 'ABC':
        return f'All 18+'
    elif age_to == 100:
        return f'{gender} {age_from}+ {income}'
    else:
        return f'{gender} {age_from}-{age_to} {income}'

df['name'] = df.apply(rename_name, axis=1)

In [47]:
def get_direction(angle):
    directions = ['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW', 'N']
    direction_index = round(angle / 45) % 8
    return directions[direction_index]

In [48]:
latitudes = []
longitudes = []
direction = []
names = []
values = []

for idx, row in df.iterrows():
    for point in row['points']:
        latitudes.append(point['lat'])
        longitudes.append(point['lon'])
        direction.append(get_direction(point['azimuth']))
        names.append(row['name'])
        values.append(row['value']/len(row['points']))


new_data = {
    'latitude': latitudes,
    'longitude': longitudes,
    'direction': direction,
    'name': names,
    'value': values
}

points_data = pd.DataFrame(new_data)
points_data.drop_duplicates(inplace=True)

In [49]:
tqdm.pandas()


districts = gpd.read_file('/content/gdrive/MyDrive/data/moscow.geojson')

def find_polygon_name(point, gdf):
    for idx, row in gdf.iterrows():
        if point.within(row['geometry']):
            return row['district']
    return 'Other'

points_data['polygon_name'] = points_data.progress_apply(
    lambda row: find_polygon_name(Point(row['longitude'], row['latitude']), districts), axis=1)

100%|██████████| 128425/128425 [18:56<00:00, 113.00it/s]


In [54]:
points_data.to_csv('points_data.csv', index=False)

In [105]:
# Функция для получения локаций по целевой аудитории и количеству сторон
def get_points(file_path):
    with open(file_path, 'r') as file:
        json_data = json.load(file)

    df = pd.DataFrame([{**json_data['targetAudience'], 'sides': json_data['sides']}])
    df['name'] = df.apply(rename_name, axis=1)
    df.drop(['gender', 'ageFrom', 'ageTo', 'income'], axis=1, inplace=True)
    df = df.reset_index(drop=True)

    points = pd.read_csv('/content/points_data.csv')
    out = points[points['name'].isin(df['name'])].nlargest(df['sides'][0], 'value')
    return out.to_json(orient='records', force_ascii=False)

In [108]:
get_points('/content/mock_audience.json')

'[{"latitude":55.734433213,"longitude":37.5432100312,"direction":"NW","name":"All 25-45 BC","value":1.9233333333,"polygon_name":"район Дорогомилово"},{"latitude":55.7927771897,"longitude":37.6292563608,"direction":"E","name":"All 25-45 BC","value":1.9233333333,"polygon_name":"район Марьина роща"},{"latitude":55.7034408068,"longitude":37.6572551042,"direction":"W","name":"All 25-45 BC","value":1.9233333333,"polygon_name":"район Даниловский"},{"latitude":55.7719047705,"longitude":37.5437883998,"direction":"SW","name":"All 25-45 BC","value":1.9233333333,"polygon_name":"район Пресненский"}]'