In [11]:
import pandas as pd
import gspread_pandas
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import font_manager as fm
from gspread_pandas import Spread

# plt.ioff() #disable interactive mode so as not to take up memory

#connecting to google api and creating a Spread object to interact with the google-spreadsheet
#conf_dir - my local dir, creds.json - my google_secret.json in the local dir which allows me to access the service account, 
#which I previously allowed in the google table
#spread is the url for my google-spreadsheet
conf = gspread_pandas.conf.get_config(conf_dir='C:\\Users\\oanov\\Тестовое', file_name='creds.json')
cred = gspread_pandas.conf.get_creds(config=conf)
spread = Spread(spread="https://docs.google.com/spreadsheets/d/1TuaKIhiv14f3W1OninFqu_-NB5uwSaJeE8j1wdraEws/edit#gid=0", config=conf, creds=cred)

#sheet -> dataframe
df = spread.sheet_to_df(0)

#explicitly specifying column types
types = ["string", int, "string", "string", int, np.float64, np.float64, "string"]
retype = dict(zip(df.columns, types))
df = df.astype(retype)

#Function for splitting keywords, spaces after line break are removed!
def str_split(in_string, n):
    """
    Divide the in_string by the first whitespace character, 
    the order of which in the entire string is greater than n. 
    We apply this to the tail of a string and so on by induction.
    
    Returns a string which name is res_string
    in which the space in the line breaks is replaced by a line break ("" -> "\ n")
    
    For example,
    -------------------------------------------------------------
    In:
    test_str = " Привет меня зовут Олег мне 21 год и я безработный хочу в датасайнс "
    n = 15
    str_split(test_str, n)
    Out: 
    Привет меня зовут\nОлег мне 21 год\nи я безработный\nхочу в датасайнс
    """
    in_string = in_string.strip()
    prev_ind = 0
    L = len(in_string)
    res_string = ''
    while in_string.find(" ", prev_ind + n - 1) != -1:
        cur_ind = in_string.find(" ", prev_ind + n - 1)
        res_string += (in_string[prev_ind : cur_ind] + "\n")
        prev_ind = cur_ind + 1
    res_string += in_string[prev_ind : L]
    return res_string
        
#splitting keywords
df['keyword'] = df.apply(lambda x: str_split(x['keyword'], 15), axis=1)

#some font styles for text and legend
font_text = fm.FontProperties(family='sans-serif', style='italic', weight='ultralight', size=15)
font_legend = fm.FontProperties(family='sans-serif', weight='light', size=15)

areas = list(df['area'].unique())
fig = plt.figure(figsize=(25, 15), dpi=100)
for area_name in areas: 
    df_ar = df[df.area==area_name]
    klcol = dict(df_ar[['cluster_name', 'color']].drop_duplicates().values)
    max_count = df_ar['count'].max() #maximum for this!!! area
    for cl_name in klcol:
        df_cln = df_ar[df_ar['cluster_name']==cl_name]
        x = df_cln.x
        y = df_cln.y
        c = klcol[cl_name]
        s = df_cln['count']
        plt.scatter(x, 
                   y, 
                   c=c,
                   s=s,#the size of the circle depends on the value of 'count'
                   edgecolors="black",
                   linewidth=1,
                   alpha=0.7,
                   label=cl_name
                  )
        df_cln.apply(lambda x: plt.annotate(x['keyword'], 
                                (x['x'], x['y']), 
                                ha='center', 
                                va='center', 
                                fontsize=26*max(x['count']/max_count, 0.5),#the size of the annotation depends on the 'count' value, but is limited from bottom
                                family='sans-serif',
                                weight='medium'), 
                                axis=1)
    plt.axis('off')#no axis

    #Footer signatures
    plt.text(0.8, 
            0.02, 
            f"№{areas.index(area_name)+1} Диаграмма рассеяния для области {area_name}", 
            ha="left", 
            va="top",
            transform=ax.transAxes,
            fontproperties=font_text,
            weight='medium'
           )
    #The legend is not fixed, so as not to block points, the size of the circles is reduced proportionaly
    plt.legend( 
              markerscale=0.5,
              title="Кластеры", #added title for legend
              title_fontsize=20,
              prop=font_legend,
              framealpha=0.6
             )

     
    path_area_name = area_name.replace("\\",'_')   #replace ar\vr to ar_vr for export png file
    plt.savefig(f'img\\Graph_{areas.index(area_name)+1}_{path_area_name}.png')
    plt.cla()
plt.close('all')