In [34]:
import json
import time
import datetime

In [64]:
import vk
import numpy as np
import pandas as pd
import user_info as ui
import matplotlib.pyplot as plt

In [65]:
session = vk.AuthSession(app_id=ui.APP_ID, user_login=ui.USER_LOGIN, user_password=ui.USER_PASSWORD)
api = vk.API(session)

In [66]:
def get_group_id(groups_list):
    """
    Получение id группы по названию
    :param groups_list: <list>
    :return: <list>
    """
    for group in groups_list:
        for item in api.groups.search(q=group['title'])[1:]:
            if group['title'] == item['name']:
                group['id'] = item['gid']
    return groups_list

In [67]:
def get_members(group_id):
    """
    Получение пола и возраста пользователей группы
    :param group_id: <int>
    :return: <list>
    """
    members = []
    offset = 0
    while True:
        response = api.groups.getMembers(group_id=group_id, offset=offset, fields=['sex', 'bdate'])
        members.extend(response['users'])
        offset += 1000
        # Создание задержки из-за ограничения на количество запросов в секунду
        if offset >= 3000:
            time.sleep(0.34)
        if offset >= response['count']:
            break
    return members

In [68]:
def gender_count(members_list):
    """
    Разделение пользователей группы по полам
    :param members_list: <list>
    :return: <dict>
    """
    genders = {'Female': 0, 'Male': 0}
    for member in members_list:
        if member['sex'] == 1:
            genders['Female'] += 1
        elif member['sex'] == 2:
            genders['Male'] += 1
    return genders

In [69]:
def get_year(date):
    """
    Получение года из даты рождения
    :param date: <str>
    :return: <str> or <None>
    """
    if date.count('.') == 2:
        return date[-4:]

In [70]:
def age_count(members_list):
    """
    Разделение пользователей группы по годам
    :param members_list: <list>
    :return: <dict>
    """
    current_year = datetime.datetime.now().year
    ages = {}
    for member in members_list:
        if 'bdate' in member:
            year = get_year(member['bdate'])
            if year is not None:
                age = current_year - int(year)
                ages.setdefault(age, 0)
                ages[age] += 1
    return ages

In [None]:
with open('top100.json') as file:
    for group in groups:
        group_members = get_members(group['id'])
        members_genders = gender_count(group_members)
        members_ages = age_count(group_members)
        genders = pd.DataFrame(list(members_genders.items()), columns=['Gender', 'Count'])
        ages = pd.DataFrame(list(members_ages.items()), columns=['Age', 'Count'])
        genders.plot.bar()
        ages.plot.bar()