In [3]:
import pandas as pd
import numpy as np

import plotly.plotly as py
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import Scatter, Figure, Layout
import plotly.graph_objs as go
from matplotlib import pyplot as plt
import operator
import seaborn as sns

from fbprophet import Prophet

init_notebook_mode(connected=True)
%matplotlib inline

# Clustering 3-d 

In [4]:
def_df = pd.read_csv('results/clusters.csv', encoding='cp1251', sep=';')
def_df.head(1)

Unnamed: 0,Причина забраковки,Тип_%,Зарещен_%,Лаборатория_%,cluster,cluster_label
0,буферная стадия,0.4,1.0,0.8,2,1


In [8]:
def_df.rename(columns={'Зарещен_%':'Запрещен_%'}, inplace=True)

In [9]:
clustering_columns = ['Тип_%', 'Запрещен_%', 'Лаборатория_%']

In [10]:
# визуализируем 3-д
data1 = []
for cluster, cluster_group in def_df.groupby(['cluster']):
    x = cluster_group[clustering_columns[0]]
    y = cluster_group[clustering_columns[1]]
    z = cluster_group[clustering_columns[2]]
    trace = go.Scatter3d(
        x=x,
        y=y,
        z=z,
        mode='markers',
        marker=dict(
            size=12,
            line=dict(
                width=0.5
            ),
            opacity=0.8
        ),
        name=cluster
    )
    data1.append(trace)
    
layout = go.Layout(
    title='Кластеризация',
    scene = dict(
                    xaxis = dict(
                        title=clustering_columns[0]),
                    yaxis = dict(
                        title=clustering_columns[1]),
                    zaxis = dict(
                        title=clustering_columns[2]),),
    margin=dict(
        l=0,
        r=0,
        b=0,
        t=0
    )
)

fig = go.Figure(data=data1, layout=layout)
plot(fig, filename='results/pics/def_clustering.html', show_link=False)

'file://C:\\Users\\ndmel\\Desktop\\aihack\\Protek\\results\\pics\\def_clustering.html'

# Time-series

In [None]:
data.sort_values(['Производитель', 'Дата документа'], inplace = True)

In [None]:
for manufacturer, group in data.groupby(['Производитель']):
    
    series = group.groupby(['Месяц документа']).size().reset_index().rename(columns={0:'y'})
    series.columns = ['ds', 'y']
    
    if series.ds.unique().shape[0] < 12:
        continue
        
    # predict
    my_model = Prophet()
#     if (series.ds.max() - series.ds.min()).days >= 365:
#         my_model.yearly_seasonality = True
    my_model.fit(series)
    future_dates = pd.DataFrame([series.ds.max() + datetime.timedelta(days=i) for i in range(0, 12 * 30, 30)], columns=['ds'])
    forecast = my_model.predict(future_dates)

    # plot
#     plt.figure(figsize=(15, 10))
#     plt.plot(series['ds'], series['y'])
#     plt.plot(forecast['ds'], forecast['yhat'])
#     plt.show()
    x = pd.DatetimeIndex(series['ds'].values)
    y = series['y'].values
    x_pred = pd.DatetimeIndex(future_dates['ds'].values)
    y_pred = forecast['yhat']
    trace = go.Scatter(
        x = x,
        y = y,
        marker = dict(
            color = 'rgb(85, 85, 85)',
        ),
        name='Факт'
    )
    trace1 = go.Scatter(
        x = x_pred,
        y = [x if x > 0 else 0 for x in y_pred],
        marker = dict(
            color = 'rgb(18, 79, 166)',
        ),
        name='Предсказание'
    )
    data1 = [trace, trace1]
    layout = go.Layout(
    title='Производитель \'%s\' изъятия по месяцам' % manufacturer,
    xaxis=dict(
        title='Дата',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Изъятия, раз',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
            )
        )
    )
    fig = go.Figure(data=data1, layout=layout)
    
    plot(fig, filename='pics/manufacturer \'%s\' reports.html' % manufacturer.replace('/', ''), show_link=False, auto_open=False)

In [None]:
for manufacturer, group in data.groupby(['Производитель']):
    
    series = group.groupby(['Год документа']).size().reset_index().rename(columns={0:'y'})
    series.columns = ['ds', 'y']
    
    if series.shape[0] <= 2:
        continue
    
    # plot
    x = series['ds'].values
    y = series['y'].values
    trace = go.Bar(
        x = x,
        y = y,
        name='Факт'
    )

    data1 = [trace]
    layout = go.Layout(
    title='Производитель \'%s\' изъятия по годам' % manufacturer,
    xaxis=dict(
        title='Дата',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Изъятия, раз',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
            )
        )
    )
    fig = go.Figure(data=data1, layout=layout)
    
    plot(fig, filename='pics/year/manufacturer \'%s\' reports.html' % manufacturer.replace('/', ''), show_link=False, auto_open=False)
#     break

In [None]:
reestr_place.sort_values(['Производитель', 'Дата документа'], inplace = True)
reestr_place.head(1)

In [None]:
for manufacturer, group in reestr_place.groupby(['Производитель']):
    
    series = group.groupby(['Месяц документа']).size().reset_index().rename(columns={0:'y'})
    series.columns = ['ds', 'y']
    
    if series.ds.unique().shape[0] < 12:
        continue
        
    # predict
    my_model = Prophet()
    my_model.fit(series)
    future_dates = pd.DataFrame([series.ds.max() + datetime.timedelta(days=i) for i in range(0, 12 * 30, 30)], columns=['ds'])
    forecast = my_model.predict(future_dates)

    # plot
    x = pd.DatetimeIndex(series['ds'].values)
    y = series['y'].values
    x_pred = pd.DatetimeIndex(future_dates['ds'].values)
    y_pred = forecast['yhat']
    trace = go.Scatter(
        x = x,
        y = y,
        marker = dict(
            color = 'rgb(85, 85, 85)',
        ),
        name='Факт'
    )
    trace1 = go.Scatter(
        x = x_pred,
        y = [x if x > 0 else 0 for x in y_pred],
        marker = dict(
            color = 'rgb(18, 79, 166)',
        ),
        name='Предсказание'
    )
    data1 = [trace, trace1]
    layout = go.Layout(
    title='Производитель \'%s\' изъятия по месяцам' % manufacturer,
    xaxis=dict(
        title='Дата',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Изъятия, раз',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
            )
        )
    )
    fig = go.Figure(data=data1, layout=layout)
    
    plot(fig, filename='pics/manufacturer \'%s\' letters.html' % manufacturer.replace('/', ''), show_link=False, auto_open=False)