In [5]:
%matplotlib inline

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

sns.set(style="darkgrid")

def read_csv(df):
    Team='ARC'
    Team='ARC_neu'
#Team='LPS'

    if Team == 'ARC':
        df = pd.read_csv('input/20190521_Board_ARC_Input_für_J_Datum_rckwrts.csv', sep=';', 
                         header=4, usecols=[0,1,4,5,6,7], parse_dates=[3,4,5], 
                         dayfirst=True,  #für deutsches Datumsformat
                         encoding='iso8859_15')
        df.columns = ['art', 'titel', 'plan', 'next', 'input', 'out']
        
    if Team == 'ARC_neu':
        df = pd.read_csv('input/20190719ErgebnisseKanbanBoardPAM-EA.csv', sep=',', header=1, usecols=[0,2,3,4,5], parse_dates=[2,3,4], encoding='iso8859_15')
        df.columns = ['PK', 'art', 'next', 'input', 'out']
        Team='ARC'
    
    if Team == 'LPS':
        df = pd.read_csv('input/20190410LPS_CFD.csv', sep=';', usecols=[9,11,18], parse_dates=[1,2], encoding='iso8859_15')
        df.columns = ['art', 'input', 'out']
#end if TEAM
#enddef  read_csv

read_csv(df)

feiertage = ['2018-10-03', # Tag der Einheit
             '2018-11-01', # Allerheiligen
             '2018-12-24', # Heiligabend
             '2018-12-25', # Weihnachten
             '2018-12-26', # Weihnachten
             '2018-12-31', # Silvester
             '2019-01-01', # Neujahr
             '2019-02-28', # Wieverfastelovend
             '2019-03-04', # Rosenmontag
             '2019-04-19', # Karfreitag
             '2019-04-22', # Ostermontag
             '2019-05-01', # 1. Mai
             '2019-05-30', # Christi Himmelfahrt
             '2019-06-10', # Pfingst-Montag
             '2019-06-20'  # Fronleichnam
            ]

# astype('datetime64[D]') notwendig
dauer=np.busday_count(df.input.values.astype('datetime64[D]'), 
                  df.out.values.astype('datetime64[D]'), 
                    holidays=feiertage)+1

df['dauer'] = dauer

von=(min(df.input))
von=von.strftime('%d.%m.%Y')

bis=(max(df.out))
bis=bis.strftime('%d.%m.%Y')

def calc_8090_percentile(ldf, art, von, bis):
    perc80 = np.percentile(ldf.dauer,80,interpolation='lower') # lower ist richtig
    perc90 = np.percentile(ldf.dauer,90,interpolation='lower')
    return (perc80, perc90)
#end 8090_percentile
    

def plot_leadtime(ldf, art, von, bis):
    perc80 = np.percentile(ldf.dauer,80,interpolation='lower')
    perc90 = np.percentile(ldf.dauer,90,interpolation='lower')
    
    # gca stands for 'get current axis'
    ax = plt.gca()
    #hist zählt direkt die Elemente
    ldf.plot(kind='hist', y='dauer',bins=max(dauer),rwidth=1, color='black', ax=ax, label='')
    
    text80 = '80% fertig in ' + str(perc80) + ' Tagen'
    # vertical dotted line originating at mean value
    plt.axvline(perc80+.2, linestyle='-.', linewidth=1, color='blue', label=text80)

    text90 = '90% fertig in ' + str(perc90) + ' Tagen'
    # vertical dotted line originating at mean value
    plt.axvline(perc90+.2, linestyle='--', linewidth=1, color='red', label=text90)

    # Beschriftung
    plt.suptitle(art)
    
    title = von +' bis ' + bis + ': ' + str(len(ldf)) + ' Zettel'
    plt.title(title, fontsize=12)
    plt.xlabel('Dauer in Tagen')
    plt.ylabel('Anzahl')
    plt.legend(loc=0)
    
    # yticks ändern, so dass der Mindestabstand 1 ist
    ymin, ymax = plt.ylim()
    ax.yaxis.set_major_locator(plt.MultipleLocator(int(ymax/10+1)))    
    
    # x-Achse ab 0
    xmin, xmax = plt.xlim()
    plt.xlim(0, xmax)
    
    
    #plt.show()
    filename='out/'+ Team + '_' + art +'.png'
    plt.savefig(filename, dpi=300)
    plt.close()
#enddef plot_leadtime()

von=(min(df.input))
von=von.strftime('%d.%m.%Y')

bis=(max(df.out))
bis=bis.strftime('%d.%m.%Y')

# muss kein DataFrame sein, ist meine komplizierte, aber laufende Lösung
arten = pd.DataFrame(columns=['Anzahl', 'Art'])
arten = arten.astype( dtype={'Anzahl': int, 'Art' : str})


for gew_art in df.art.drop_duplicates():
    gewaehlt = df.art == gew_art
#   gewaehlt = df.input > '2018-12-31'
    plot_leadtime(df[gewaehlt], gew_art, von, bis)
    laenge = len(df[gewaehlt])
    print gew_art, laenge
    arten.loc[len(arten)] = [int(laenge), gew_art]
#endfor gew_art in ....
print arten

plot_leadtime(df, 'Alle', von, bis)

NameError: name 'df' is not defined

In [42]:
def plot_turbulence(ldf, art, von, bis):
    
    #alle Arbeitstage von ... bis
    bdate = pd.bdate_range(von, bis, holidays=feiertage, freq='C')
    Tage = bdate.strftime('%d.%m.%y')

    # erste Spalte des neuen Dataframes tur
    series = pd.DataFrame(bdate, columns=['days'])
    series['Tage'] = Tage
    
    series['lfd'] = np.arange(1, len(series)+1)
    print series.lfd

    # number of pull-actions (System liquidity)
    i = 0
    sum = 0
    col = np.zeros(len(series), int) 
    for day in series.days:
        sum = len(df[df.out == day])
        sum += len(df[df.input == day])
        col[i] = sum
        i += 1
    #endfor day in series.days
    series['pulls'] = col
    
    sum_pulls = np.sum(col)
    
    first = np.gradient(series.pulls)
    
    # 2nd deviation = Volatility
    
    vol = np.gradient(first)
    #print (vol)
    series['volatility'] = vol
    
    third = np.gradient(vol)
    #series['third'] = third
    
    # 2nd 2nd deviation = turbulence
    tur = np.gradient(third)
    #print (tur)
    series['volatility_of_volatility'] = tur
    
    from scipy import stats 
    series['fourth_moment'] = stats.moment(col, moment = 4)
    
    # Turbulence as in Swiftkanban
    frequency = np.zeros(max(col)+1, int) 
    for i in range (0, max(col)+1):
        frequency[i] = len (col[col == i])
        # print i, frequency[i]
       
    wahrscheinlichkeit = np.zeros(len(series), dtype = float) 

    i = 0
    for p in series.pulls:
        wahrscheinlichkeit[i] = frequency[p] 
        i += 1
        
            
#    print wahrscheinlichkeit
#    print np.mean(col)
    
    #real_turb = (col - np.mean(col))
    real_turb = np.power(col - np.mean(col), 4) * wahrscheinlichkeit
    print real_turb 
    #/ sum_pulls
   
    series['turb'] = real_turb 
    #/ sum_pulls

    #gewaehltes y
    gew_y = 6
    print (gew_y)
    
    #ax=tur.plot(kind='bar', stacked=True,  linewidth=.1 , x='Tage', y=[2,3])
    ax=series.plot(kind='line' , x='Tage', y=[gew_y])
    
    #nur jeden xstep-ten Wert plotten auf x-achse
    xstep = 10
    xindex = np.arange(0, len(series), xstep, int)
    
    einigeTage=Tage[xindex]
    ax.xaxis.set_major_locator(plt.MultipleLocator(xstep)) 
    ax.set_xticklabels(labels=einigeTage)
    
    # integers for y-Axis
    from matplotlib.ticker import MaxNLocator
    ax.yaxis.set_major_locator(MaxNLocator(integer=True))
    
    filename='turbulence/'+ Team + '_' + art + '_' + str(gew_y) +'.png'
    #filename='turbulence/'+ Team + '_' + art + '.png'

    #ax.autoscale_view()
    plt.setp(plt.gca().get_xticklabels(), rotation=45, horizontalalignment='right', size = 5) 
    
    # Beschriftung
    #plt.suptitle('Turbulenz von Team ' + Team)
    #Subtitle = 'Pulls'
   
    #title = von +' bis ' + bis + ': ' + str(len(ldf)) + ' Zettel'
    #title =  str(len(ldf)) + ' Zettel'
    title = 'Auswertung von Team ' + Team
       
    plt.title(title)
     #plt.ylabel('Anzahl')
    plt.xlabel('')
    
    plt.legend(loc=0)   
    
    # noch Testen
    # x-Achse sinnvoll verschieben!! Jeweils manuel anzupassen!!!
    xmin, xmax = plt.xlim()
    
    #plt.xlim(xmin+4, xmax-22)    
    
    #plt.ylim(-12,12)
    
    plt.savefig(filename, dpi=300)
    plt.close()

#end plot_turbulence

plot_turbulence(df, 'Turbulence', '2018-08-21', '2019-05-04')

0        1
1        2
2        3
3        4
4        5
5        6
6        7
7        8
8        9
9       10
10      11
11      12
12      13
13      14
14      15
15      16
16      17
17      18
18      19
19      20
20      21
21      22
22      23
23      24
24      25
25      26
26      27
27      28
28      29
29      30
      ... 
142    143
143    144
144    145
145    146
146    147
147    148
148    149
149    150
150    151
151    152
152    153
153    154
154    155
155    156
156    157
157    158
158    159
159    160
160    161
161    162
162    163
163    164
164    165
165    166
166    167
167    168
168    169
169    170
170    171
171    172
Name: lfd, Length: 172, dtype: int64
[1.86457821e+02 1.71392768e+03 1.71392768e+03 2.23774985e+03
 2.66608107e+01 9.99756567e-06 1.01022556e+03 5.43234703e+02
 1.58114027e+03 5.43234703e+02 1.71392768e+03 2.66608107e+01
 2.01860704e+01 2.66608107e+01 3.83797280e+03 1.01167871e+04
 1.32923580e+04 2.44113285e+03 3.87360649e+04 9.

In [6]:
def plot_cfd(ldf, art, von, bis):
    
    #alle Arbeitstage von ... bis
    bdate = pd.bdate_range(von, bis, holidays=feiertage, freq='C')
    Tage = bdate.strftime('%d.%m.%y')

    # erste Spalte des neuen Dataframes cfd
    cfd = pd.DataFrame(bdate, columns=['days'])
    cfd['Tage'] = Tage

    i = 0
    sum = 0
    col = np.zeros(len(cfd), int) 
    for day in cfd.days:
        sum += len(df[df.out == day])
        col[i] = sum
        i += 1
    #endfor day in cfd.days
    cfd['fertig'] = col

    i = 0
    sum = 0
    col = np.zeros(len(cfd), int) 
    for day in cfd.days:
        sum += len(df[df.input == day])
#         print str(day), len(df[df.input == day]), len(df[df.out == day])
        sum -= len(df[df.out == day])
        col[i] = sum
        if (col[i] < 0): 
            #             print "neg", str(day), str(col[i])
                col[i] = 0
                
        i += 1
    #endfor day in cfd.days
    
    cfd['Umsetzung'] = col
        
    ax=cfd.plot(kind='bar', stacked=True,  linewidth=.1 , x='Tage', y=[2,3])
    
    #nur jeden xstep-ten Wert plotten
    xstep = 10
    xindex = np.arange(2, len(cfd), xstep, int)
    
    einigeTage=Tage[xindex]
    ax.xaxis.set_major_locator(plt.MultipleLocator(xstep)) 
    ax.set_xticklabels(labels=einigeTage)
    
    filename='out/'+ Team + '_' + art +'.png'

    #ax.autoscale_view()
    plt.setp(plt.gca().get_xticklabels(), rotation=45, horizontalalignment='right', size = 5) 
    
    # Beschriftung
    plt.suptitle('CFD von Team ' + Team)
   
    title = von +' bis ' + bis + ': ' + str(len(ldf)) + ' Zettel'
    plt.title(title)
    plt.ylabel('Anzahl')
    plt.xlabel('')
    
    plt.legend(loc=0)
    
    # noch Testen
    # x-Achse sinnvoll verschieben!! Jeweils manuel anzupassen!!!
    xmin, xmax = plt.xlim()
    plt.xlim(xmin+4, xmax-22)    
    
    plt.savefig(filename, dpi=300)
    plt.close()

#end plot_cfd

plot_cfd(df, 'CFD', von, bis)



TypeError: Empty 'DataFrame': no numeric data to plot

In [4]:
def plot_art(ldf, von, bis):
        

IndentationError: expected an indented block (<ipython-input-4-c9158f5912bb>, line 2)

In [5]:
# Breite des Intervals für den "gleitenden Durchschnitt" in Tagen
#breite = 

#alle Arbeitstage von ... bis
data = pd.bdate_range(von, bis, holidays=feiertage, freq='C')

# erste Spalte des neuen Dataframes perc
perc = pd.DataFrame(data, columns=['days'])

In [6]:
i = 0
p80 = np.zeros(len(perc), int) 
for day in perc.days:
    gewaehlt = df.input > day
    if (len (df.dauer[gewaehlt]) != 0):
        p80[i]  = np.percentile(df.dauer[gewaehlt],90,interpolation='lower')
    i = i + 1
#endfor day in perc.days

perc['Alle'] = p80

for gew_art in df.art.drop_duplicates():
    i = 0
    p80 = np.zeros(len(perc), int) 
    gewaehlt = df.art == gew_art
    for day in perc.days:
        gewaehlt &= df.input > day
        if (len (df.dauer[gewaehlt]) != 0):
            p80[i]  = np.percentile(df.dauer[gewaehlt],90,interpolation='lower')
        i = i + 1
    #endfor day in perc.days
 #   perc[gew_art] = p80
#endfor gew_art in ...
perc.tail()

Unnamed: 0,days,Alle
249,2019-05-29,0
250,2019-05-31,0
251,2019-06-03,0
252,2019-06-04,0
253,2019-06-05,0


In [None]:

perc.plot(x='days')

In [None]:
perc.plot(x='days')
filename='out/'+ 'perc_over_time' +'.png'
plt.savefig(filename, dpi=300)
plt.close()

In [None]:
fig1, ax1 = plt.subplots()
ax1.pie(arten.Anzahl,  autopct='%1.1f%%', labels=arten.Art,
        startangle=90)
ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

plt.show()


In [None]:
arten = arten.sort_values(by='Anzahl', ascending=False)

fig, ax = plt.subplots(figsize=(16, 8), subplot_kw=dict(aspect="equal"))

# Absolutwerte und Prozente
def func(pct, allvals):
    absolute = int(pct/100.*np.sum(allvals)+.5)
    return "{:.0f}%\n({:d})".format(pct, absolute)


wedges, texts, autotexts = ax.pie(arten.Anzahl, autopct=lambda pct: func(pct, arten.Anzahl),
                                  textprops=dict(color="w"), counterclock=False, startangle = 150)

ax.legend(wedges, arten.Art,
          title="Arten der Arbeit",
          loc="center left",
          bbox_to_anchor=(1, 0, 0.5, 1))

plt.setp(autotexts, size=10, weight="bold")

ax.set_title(Team + ": Verteilung nach Arten der Arbeit")

#plt.show()
art='Arten_der_Arbeit'
filename='out/'+ Team + '_' + art +'.png'
plt.savefig(filename, dpi=300)
plt.close()