In [None]:
%pip install pandas matplotlib numpy jinja2 wordcloud

In [None]:
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random
from wordcloud import WordCloud, STOPWORDS

plt.style.use('dark_background')
plt.rcParams['font.sans-serif'] = "Helvetica Neue"
plt.rcParams["figure.dpi"] = 400

In [None]:
conn = sqlite3.connect('../DATA/folie.db')

In [None]:
def fix_mark(mark):
    if mark is None: return None
    mark = abs(int(mark))
    if mark == 0: return None
    if mark <= 10: return mark

    if str(mark)[:1] == '10':
        return 10
    else:
        return abs(int(str(mark)[0]))

def fix_year(year):
    if year == '2023/2024': return None
    return year

def fix_module(module):
    if not module[0].isdigit(): return None
    return module[0]

query = 'SELECT * FROM projects'
df = pd.read_sql(query, conn)

mark_columns = ['mark_mid', 'mark_project', 'mark_final']
df[mark_columns] = df[mark_columns].map(fix_mark).fillna(0).astype(int)
for c in ['mark_project', 'mark_final']:
    df = df[df[c] != 0]

df['mark_mean'] = df[['mark_mid', 'mark_project', 'mark_final']].mean(axis=1)
df['mark_median'] = df[['mark_mid', 'mark_project', 'mark_final']].median(axis=1)

df['academic_year'] = df['academic_year'].map(fix_year)
df['module'] = df['module'].map(fix_module)

df['published'] = pd.to_datetime(df['published'], dayfirst=True)

In [None]:
TITLE = 'Общий share оценок за все время'

mark_counts = df['mark_final'].value_counts().sort_index()
# print(mark_counts)

fig,ax = plt.subplots()
fig.set_facecolor("#282828")
ax.set_facecolor("#282828")

mark_counts.plot(kind='bar', color='skyblue', rot=0)

plt.title(TITLE)
plt.xlabel('Оценки')
plt.ylabel('Кол-во проектов')

plt.show()

In [None]:
TITLE = 'Количество проектов каждый год'

years = df['academic_year'].value_counts().sort_index(ascending=False)
# print(years)

fig,ax = plt.subplots(figsize=(8, 4.75))
fig.set_facecolor("#282828")
ax.set_facecolor("#282828")

years.plot(kind='barh', color='skyblue', width=0.6)

plt.title(TITLE)
plt.ylabel('Учебные года')
plt.xlabel('Кол-во проектов')

plt.show()

In [None]:
TITLE = 'Количество уникальных авторов каждый год'

unique_authors = df.groupby('academic_year')['author'].nunique().reset_index().sort_values('academic_year', ascending=False)
print(df['author'].unique().size)

fig,ax = plt.subplots(figsize=(8, 4.75))
fig.set_facecolor("#282828")
ax.set_facecolor("#282828")

plt.barh(unique_authors['academic_year'], unique_authors['author'], color='skyblue', height=0.6)

plt.title(TITLE)
plt.ylabel('Учебные года')
plt.xlabel('Кол-во авторов')

plt.show()

In [None]:
TITLE = 'Разница оценок между предпросмотром и просмотром'

df['mid_project_difference'] = df['mark_mid'] - df['mark_project']
top_differences = df['mid_project_difference'].value_counts().nlargest(10).sort_index()

fig,ax = plt.subplots(figsize=(8, 4.75))
fig.set_facecolor("#282828")
ax.set_facecolor("#282828")

top_differences.plot(kind='bar', color='skyblue', width=0.6, rot=0)

plt.title(TITLE)
plt.xlabel('Разница')
plt.ylabel('Кол-во проектов')

plt.show()

In [None]:
TITLE = 'Разница оценок между просмотром и зачеткой'

df['project_final_difference'] = df['mark_project'] - df['mark_final']
top_differences = df['project_final_difference'].value_counts().nlargest(5).sort_index()

fig,ax = plt.subplots(figsize=(8, 4.75))
fig.set_facecolor("#282828")
ax.set_facecolor("#282828")

top_differences.plot(kind='bar', color='skyblue', width=0.4, rot=0)

plt.title(TITLE)
plt.xlabel('Разница')
plt.ylabel('Кол-во проектов')

plt.show()

In [None]:
TITLE = 'Тренд оценок за предпросмотры'

avg_marks = df.groupby('academic_year')['mark_mid'].mean()
# print(avg_marks)

fig,ax1 = plt.subplots(figsize=(10, 4.75))
fig.set_facecolor("#282828")
ax1.set_facecolor("#282828")
ax1.set_ylabel('Средняя оценка', color='skyblue')

ax2 = ax1.twinx()
ax2.set_ylabel('Кол-во десяток', color='orange')

plot = avg_marks.plot(kind='line', color='skyblue', marker='o', ax=ax1)

df_10 = df[df['mark_mid'] == 10]
df_10_count = df_10.groupby('academic_year').size()

df_10_count.plot(kind='line', color='orange', marker='o', ax=ax2)

x_labels = avg_marks.index.tolist()
plot.set_xticks(range(len(x_labels)))
plot.set_xticklabels(x_labels)

ax1.set_xlabel('Учебный год')

plt.title(TITLE)

plt.show()

In [None]:
TITLE = 'Тренд оценок за проекты'

avg_marks = df.groupby('academic_year')['mark_project'].mean()
# print(avg_marks)

fig,ax1 = plt.subplots(figsize=(10, 4.75))
fig.set_facecolor("#282828")
ax1.set_facecolor("#282828")
ax1.set_ylabel('Средняя оценка', color='skyblue')

ax2 = ax1.twinx()
ax2.set_ylabel('Процент десяток', color='orange')

plot = avg_marks.plot(kind='line', color='skyblue', marker='o', ax=ax1)

df_total_count = df.groupby('academic_year').size()
df_10 = df[df['mark_project'] == 10]
df_10_count = df_10.groupby('academic_year').size()
df_10_percentage = (df_10_count / df_total_count) * 100
# print(df_10_percentage)

df_10_percentage.plot(kind='line', color='orange', marker='o', ax=ax2)

x_labels = avg_marks.index.tolist()
plot.set_xticks(range(len(x_labels)))
plot.set_xticklabels(x_labels)

ax1.set_xlabel('Учебный год')

plt.title(TITLE)

plt.show()

In [None]:
TITLE = 'Тренд оценок в зачетках'

avg_marks = df.groupby('academic_year')['mark_final'].mean()
# print(avg_marks)

fig,ax1 = plt.subplots(figsize=(10, 4.75))
fig.set_facecolor("#282828")
ax1.set_facecolor("#282828")
ax1.set_ylabel('Средняя оценка', color='skyblue')

ax2 = ax1.twinx()
ax2.set_ylabel('Кол-во десяток', color='orange')

plot = avg_marks.plot(kind='line', color='skyblue', marker='o', ax=ax1)

df_10 = df[df['mark_final'] == 10]
df_10_count = df_10.groupby('academic_year').size()

df_10_count.plot(kind='line', color='orange', marker='o', ax=ax2)

x_labels = avg_marks.index.tolist()
plot.set_xticks(range(len(x_labels)))
plot.set_xticklabels(x_labels)

ax1.set_xlabel('Учебный год')

plt.title(TITLE)

plt.show()

In [None]:
TITLE = 'Тренд средних оценок'

avg_marks = df.groupby('academic_year')['mark_mean'].mean()
# print(avg_marks)

fig,ax1 = plt.subplots(figsize=(10, 4.75))
fig.set_facecolor("#282828")
ax1.set_facecolor("#282828")
ax1.set_ylabel('Средняя оценка', color='skyblue')

# ax2 = ax1.twinx()
# ax2.set_ylabel('Кол-во десяток', color='orange')

plot = avg_marks.plot(kind='line', color='skyblue', marker='o', ax=ax1)

df_10 = df[df['mark_mean'] == 10]
df_10_count = df_10.groupby('academic_year').size()

# df_10_count.plot(kind='line', color='orange', marker='o', ax=ax2)

x_labels = avg_marks.index.tolist()
plot.set_xticks(range(len(x_labels)))
plot.set_xticklabels(x_labels)

ax1.set_xlabel('Учебный год')

plt.title(TITLE)

plt.show()

In [None]:
TITLE = 'Тренд медианных оценок'

avg_marks = df.groupby('academic_year')['mark_median'].mean()
# print(avg_marks)

fig,ax1 = plt.subplots(figsize=(10, 4.75))
fig.set_facecolor("#282828")
ax1.set_facecolor("#282828")
ax1.set_ylabel('Средняя оценка', color='skyblue')

# ax2 = ax1.twinx()
# ax2.set_ylabel('Кол-во десяток', color='orange')

plot = avg_marks.plot(kind='line', color='skyblue', marker='o', ax=ax1)

df_10 = df[df['mark_median'] == 10]
df_10_count = df_10.groupby('academic_year').size()

# df_10_count.plot(kind='line', color='orange', marker='o', ax=ax2)

x_labels = avg_marks.index.tolist()
plot.set_xticks(range(len(x_labels)))
plot.set_xticklabels(x_labels)

ax1.set_xlabel('Учебный год')

plt.title(TITLE)

plt.show()

In [None]:
TITLE = 'Разница между первой и последней оценкой одного автора'

df = df[df['author'] != 'multiple']

df = df.sort_values(['author', 'id'])

df['first_id'] = df.groupby('author')['id'].transform('first')
df['last_id'] = df.groupby('author')['id'].transform('last')

df['first_mark'] = df.groupby('author')['mark_final'].transform('first')
df['last_mark'] = df.groupby('author')['mark_final'].transform('last')

df['mark_difference'] = df.apply(lambda x: x['last_mark'] - x['first_mark'] if x['last_id'] != x['first_id'] else np.nan, axis=1).fillna(-100).astype(int)
df = df[df['mark_difference'] != -100]

df_unique = df.drop_duplicates(subset='author')

most_common_diff = df_unique['mark_difference'].value_counts().nlargest(10).sort_index()

# print(most_common_diff)

fig,ax = plt.subplots(figsize=(8, 4.75))
fig.set_facecolor("#282828")
ax.set_facecolor("#282828")

most_common_diff.plot(kind='bar', color='skyblue', width=0.6, rot=0, alpha=0.9)
ax.set_xlabel('Разница')
ax.set_ylabel('Кол-во человек')

# average_mark.plot(kind='bar', color='r', width=0.6, rot=0, ax=ax1, alpha=0.4)
# ax2.set_ylabel('Количество человек')

plt.title(TITLE)
plt.show()

In [None]:
TITLE = 'Среднее количество проектов на человека в год'

df_grouped = df.groupby(['academic_year', 'author'])['id'].count().reset_index()
df_grouped.columns = ['academic_year', 'author', 'project_count']
average_projects = df_grouped.groupby('academic_year')['project_count'].mean()

fig,ax = plt.subplots(figsize=(10, 4.75))
fig.set_facecolor("#282828")
ax.set_facecolor("#282828")

average_projects.plot(kind='bar', color='skyblue', width=0.6, rot=0)

plt.title(TITLE)
plt.xlabel('Учебный год')
plt.ylabel('Кол-во проектов')

plt.show()


In [None]:
TITLE = 'Самые популярные фамилии студентов'

df['last_name'] = df['author'].apply(lambda name: name.split()[-1] if pd.notna(name) else "")
last_name_counts = df['last_name'].value_counts().head(10)

fig,ax = plt.subplots(figsize=(10, 4.75))
fig.set_facecolor("#282828")
ax.set_facecolor("#282828")

last_name_counts.plot(kind='bar', color='skyblue', width=0.6, rot=0)

plt.title(TITLE)
plt.xlabel('Фамилии')
plt.ylabel('Кол-во вхождений')

plt.show()


In [None]:
TITLE = 'Самые популярные фамилии студентов, не заканчивающиеся на "a"'

df['last_name'] = df['author'].apply(lambda name: name.split()[-1] if (pd.notna(name) and not name.endswith('а') and name != 'multiple') else None)
last_name_counts = df['last_name'].value_counts().head(10)

fig,ax = plt.subplots(figsize=(10, 4.75))
fig.set_facecolor("#282828")
ax.set_facecolor("#282828")

last_name_counts.plot(kind='bar', color='skyblue', width=0.6, rot=0)

plt.title(TITLE)
plt.xlabel('Фамилии')
plt.ylabel('Кол-во вхождений')

plt.show()


In [None]:
TITLE = 'Самые высокооцениваемые фамилии студентов'

df['last_name'] = df['author'].apply(lambda name: name.split()[-1] if pd.notna(name) else "")
df_filtered = df.groupby('last_name').filter(lambda x: len(x) >= 50)
avg_marks = df_filtered.groupby('last_name')['mark_project'].mean().nlargest(20)

fig,ax = plt.subplots(figsize=(10, 4.75))
fig.set_facecolor("#282828")
ax.set_facecolor("#282828")

avg_marks.plot(kind='bar', color='skyblue', width=0.6, rot=45)

plt.title(TITLE)
plt.xlabel('Фамилии')
plt.ylabel('Средняя оценка')
plt.ylim((8.35, 9.4))

plt.show()


In [None]:
TITLE = 'Количество проектов по кураторам'

lecture_counts = df['curator'].value_counts().head(15)

fig,ax = plt.subplots(figsize=(10, 4.75))
fig.set_facecolor("#282828")
ax.set_facecolor("#282828")

lecture_counts.plot(kind='bar', color='skyblue', width=0.6, rot=90)

plt.title(TITLE)
plt.xlabel('Фамилии')
plt.ylabel('Кол-во проектов')

plt.show()


In [None]:
TITLE = 'Самые результативные кураторы'

df_filtered = df.groupby('curator').filter(lambda x: len(x) >= 75)
avg_marks = df_filtered.groupby('curator')['mark_project'].mean().nlargest(20)

fig,ax = plt.subplots(figsize=(10, 4.75))
fig.set_facecolor("#282828")
ax.set_facecolor("#282828")

avg_marks.plot(kind='bar', color='skyblue', width=0.6, rot=90)

plt.title(TITLE)
plt.xlabel('Фамилии')
plt.ylabel('Средняя оценка')
plt.ylim((8, 9.1))

plt.show()


In [None]:
TITLE = 'Топ-25 самых успешных групп евер'

df_filtered = df.groupby('group_name').filter(lambda x: len(x) >= 100)

avg_marks = df_filtered.groupby('group_name')['mark_project'].mean().nlargest(25)
# print(avg_marks)

fig,ax = plt.subplots(figsize=(12, 4.75))
fig.set_facecolor("#282828")
ax.set_facecolor("#282828")

plot = avg_marks.plot(kind='bar', color='skyblue', width=0.6, rot=45)

plt.title(TITLE)
plt.xlabel('Группа')
plt.ylabel('Средняя оценка')
plt.ylim((8.35, 8.9))

plt.show()

In [None]:
TITLE = 'Топ-25 самых успешных групп за последние 3 года'

df_filtered = df.groupby('group_name').filter(lambda x: len(x) >= 100)
df_filtered = df_filtered[df_filtered['academic_year'].isin(['2021/2022', '2022/2023', '2023/2024'])]

avg_marks = df_filtered.groupby('group_name')['mark_project'].mean().nlargest(25)
# print(avg_marks)

fig,ax = plt.subplots(figsize=(12, 4.75))
fig.set_facecolor("#282828")
ax.set_facecolor("#282828")

plot = avg_marks.plot(kind='bar', color='skyblue', width=0.6, rot=45)

plt.title(TITLE)
plt.xlabel('Группа')
plt.ylabel('Средняя оценка')
plt.ylim((8, 8.75))

plt.show()

In [None]:
TITLE = 'Топ-25 самых успешных групп сезона Б22ДЗ**'

df_filtered = df.groupby('group_name').filter(lambda x: len(x) >= 100)
df_filtered = df_filtered[df_filtered['group_name'].str.startswith('Б22')]

avg_marks = df_filtered.groupby('group_name')['mark_project'].mean().nlargest(25)
# print(avg_marks)

fig,ax = plt.subplots(figsize=(12, 4.75))
fig.set_facecolor("#282828")
ax.set_facecolor("#282828")

plot = avg_marks.plot(kind='bar', color='skyblue', width=0.6, rot=45)

plt.title(TITLE)
plt.xlabel('Группа')
plt.ylabel('Средняя оценка')
plt.ylim((7.7, 8.6))

plt.show()

In [None]:
TITLE = 'Средний тренд по модулям за последние 2 года'

fig,ax1 = plt.subplots(figsize=(10, 4.75))
fig.set_facecolor("#282828")
ax1.set_facecolor("#282828")
ax1.set_ylabel('Средняя оценка', color='skyblue')

ax2 = ax1.twinx()
ax2.set_ylabel('Кол-во проектов', color='orange')

df_filtered = df[df['academic_year'].isin(['2021/2022', '2022/2023'])]

modules = df_filtered['module'].value_counts().sort_index()
# print(modules)

avg_marks = df_filtered.groupby('module')['mark_final'].mean()

plot = avg_marks.plot(kind='line', color='skyblue', marker='o', ax=ax1)

x_labels = avg_marks.index.tolist()
plot.set_xticks(range(len(x_labels)))
plot.set_xticklabels(x_labels)

modules.plot(kind='line', color='orange', marker='o', ax=ax2)
ax1.set_xlabel('Модуль')

plt.title(TITLE)
plt.show()

In [None]:
TITLE = 'Самые продуктивные дисциплины'

program_counts = df['discipline'].value_counts().head(15)

fig,ax = plt.subplots(figsize=(10, 4.75))
fig.set_facecolor("#282828")
ax.set_facecolor("#282828")

program_counts.plot(kind='bar', color='skyblue', width=0.6, rot=90)

plt.title(TITLE)
plt.xlabel('Дисциплина')
plt.ylabel('Кол-во проектов')

plt.show()


In [None]:
TITLE = 'Самые щедрые просмотры за последние 3 года'

df_filtered = df[df['academic_year'].isin(['2020/2021', '2021/2022', '2022/2023'])]
df_filtered = df_filtered[df_filtered['view'] != '']
avg_marks = df_filtered.groupby('view')['mark_project'].mean().nlargest(20).sort_values()

fig,ax = plt.subplots(figsize=(10, 4.75))
fig.set_facecolor("#282828")
ax.set_facecolor("#282828")

avg_marks.plot(kind='barh', color='skyblue', width=0.6, rot=0)

plt.title(TITLE)
plt.xlabel('Средняя оценка за проект')
plt.ylabel('Просмотр')
plt.xlim((7.5, 7.85))

plt.show()

In [None]:
TITLE = 'Среднее vs. медианное оценок за все время (mark_project)'

mean_marks = df.groupby('academic_year')['mark_project'].mean()
median_marks = df.groupby('academic_year')['mark_project'].median()

fig,ax1 = plt.subplots(figsize=(10, 4.75))
fig.set_facecolor("#282828")
ax1.set_facecolor("#282828")
ax1.set_ylabel('Среднее', color='skyblue')

ax2 = ax1.twinx()
ax2.set_ylabel('Медианное', color='orange')

plot = mean_marks.plot(kind='line', color='skyblue', marker='o', ax=ax1)

median_marks.plot(kind='line', color='orange', marker='o', ax=ax2)

x_labels = mean_marks.index.tolist()
plot.set_xticks(range(len(x_labels)))
plot.set_xticklabels(x_labels)

ax1.set_xlabel('Учебный год')

plt.title(TITLE)

plt.show()

In [None]:
TITLE = 'Среднее vs. медианное оценок за все время (mark_median)'

mean_marks = df.groupby('academic_year')['mark_median'].mean()
median_marks = df.groupby('academic_year')['mark_median'].median()

fig,ax1 = plt.subplots(figsize=(10, 4.75))
fig.set_facecolor("#282828")
ax1.set_facecolor("#282828")
ax1.set_ylabel('Среднее', color='skyblue')

ax2 = ax1.twinx()
ax2.set_ylabel('Медианное', color='orange')

plot = mean_marks.plot(kind='line', color='skyblue', marker='o', ax=ax1)

median_marks.plot(kind='line', color='orange', marker='o', ax=ax2)

x_labels = mean_marks.index.tolist()
plot.set_xticks(range(len(x_labels)))
plot.set_xticklabels(x_labels)

ax1.set_xlabel('Учебный год')

plt.title(TITLE)

plt.show()

In [None]:
TITLE = 'Среднее vs. медианное оценок за все время (mark_mean)'

mean_marks = df.groupby('academic_year')['mark_mean'].mean()
median_marks = df.groupby('academic_year')['mark_mean'].median()

fig,ax1 = plt.subplots(figsize=(10, 4.75))
fig.set_facecolor("#282828")
ax1.set_facecolor("#282828")
ax1.set_ylabel('Среднее', color='skyblue')

ax2 = ax1.twinx()
ax2.set_ylabel('Медианное', color='orange')

plot = mean_marks.plot(kind='line', color='skyblue', marker='o', ax=ax1)

median_marks.plot(kind='line', color='orange', marker='o', ax=ax2)

x_labels = mean_marks.index.tolist()
plot.set_xticks(range(len(x_labels)))
plot.set_xticklabels(x_labels)

ax1.set_xlabel('Учебный год')

plt.title(TITLE)

plt.show()

In [None]:
TITLE = 'Публикации проектов в день в течение года'

df["day_of_year"] = df["published"].dt.dayofyear
grouped_df = df.groupby("day_of_year").size().reset_index(name='counts')
number_of_years = df["published"].dt.year.nunique()
grouped_df['averages'] = grouped_df['counts'] / number_of_years

fig,ax = plt.subplots(figsize=(10, 4.75))
fig.set_facecolor("#282828")
ax.set_facecolor("#282828")

plt.plot(grouped_df["day_of_year"], grouped_df["averages"], color='skyblue')
plt.xlabel('Месяц')
plt.ylabel('Публикаций')
plt.title('Average Row Counts per Day of Year')
plt.xticks([1, 32, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335],
           ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])

plt.title(TITLE)
plt.show()

In [None]:
TITLE = 'Облако слов по названиям'

stopwords = set(STOPWORDS)
stopwords.add('и')
stopwords.add('в')
stopwords.add('у')
stopwords.add('я')
stopwords.add('а')
stopwords.add('с')
stopwords.add('о')
stopwords.add('от')
stopwords.add('по')
stopwords.add('не')
stopwords.add('до')
stopwords.add('на')
stopwords.add('из')
stopwords.add('это')
stopwords.add('s')
stopwords.add('х')

words = ' '.join(df['title'])

color_array = ['#FBBA72','#DDAE7E','#93A3B1','#FEB95F','#7C898B','#EF6F6C']

def color_func(word, font_size, position, orientation, random_state=None,**kwargs):
    return random.choice(color_array)


wordcloud = WordCloud(width=1140, height=1600,
                contour_width=0,
                background_color='#282828',
                color_func=color_func,
                stopwords=stopwords,
                min_font_size=10).generate(words)

plt.figure(figsize = (20, 10), facecolor = None)
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad = 0)

plt.show()

In [None]:
TITLE = 'Облако слов по описаниям'

stopwords = set(STOPWORDS)
stopwords.add('и')
stopwords.add('в')
stopwords.add('у')
stopwords.add('я')
stopwords.add('а')
stopwords.add('с')
stopwords.add('о')
stopwords.add('от')
stopwords.add('по')
stopwords.add('не')
stopwords.add('до')
stopwords.add('на')
stopwords.add('из')
stopwords.add('это')
stopwords.add('его')
stopwords.add('их')
stopwords.add('но')
stopwords.add('он')
stopwords.add('ее')
stopwords.add('них')
stopwords.add('себя')
stopwords.add('него')
stopwords.add('как')
stopwords.add('или')
stopwords.add('себе')
stopwords.add('только')
stopwords.add('также')
stopwords.add('так')
stopwords.add('этом')
stopwords.add('s')
stopwords.add('х')

words = ' '.join(df['description'])

color_array = ['#FBBA72','#DDAE7E','#93A3B1','#FEB95F','#7C898B','#EF6F6C']

def color_func(word, font_size, position, orientation, random_state=None,**kwargs):
    return random.choice(color_array)


wordcloud = WordCloud(width=2000, height=1000,
                contour_width=0,
                background_color='#282828',
                color_func=color_func,
                stopwords=stopwords,
                min_font_size=10).generate(words)

plt.figure(figsize = (20, 10), facecolor = None)
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad = 0)

plt.show()

In [None]:
TITLE = 'Облако слов по тегам'

stopwords = set(STOPWORDS)
stopwords.add('и')
stopwords.add('в')
stopwords.add('у')
stopwords.add('я')
stopwords.add('а')
stopwords.add('с')
stopwords.add('о')
stopwords.add('от')
stopwords.add('по')
stopwords.add('не')
stopwords.add('до')
stopwords.add('на')
stopwords.add('из')
stopwords.add('это')
stopwords.add('s')
stopwords.add('х')

words = ' '.join(df['tags'])

color_array = ['#FBBA72','#DDAE7E','#93A3B1','#FEB95F','#7C898B','#EF6F6C']

def color_func(word, font_size, position, orientation, random_state=None,**kwargs):
    return random.choice(color_array)


wordcloud = WordCloud(width=1600, height=1140,
                contour_width=0,
                background_color='#282828',
                color_func=color_func,
                stopwords=stopwords,
                min_font_size=10).generate(words)

plt.figure(figsize = (20, 10), facecolor = None)
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad = 0)

plt.show()