In [1]:
%matplotlib inline

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

sns.set(style="darkgrid")

# CSV einlesen (Achtung keine '/' in den Spaltenüberschriften wegen späterem Namen)
# geht davon aus, dass keine Fehler in der Datei sind und alle Felder richtig gefüllt
# zum Überprüfen dien Notebooks LPS_first bzw. ARC_first nutzen

#Team='ARC'
Team='LPS'

if Team == 'ARC':
    df = pd.read_csv('input/20190521_Board_ARC_Input_für_J_Datum_rckwrts.csv', sep=';', 
                 header=4, usecols=[0,1,4,5,6,7], parse_dates=[3,4,5], encoding='iso8859_15')
    df.columns = ['art', 'titel', 'plan', 'next', 'input', 'out']
else:
    df = pd.read_csv('input/20190410LPS_CFD.csv', sep=';', usecols=[9,11,18], parse_dates=[1,2], encoding='iso8859_15')
    df.columns = ['art', 'input', 'out']
#end it TEAM
    
feiertage = ['2018-10-03', # Tag der Einheit
             '2018-11-01', # Allerheiligen
             '2018-12-24', # Heiligabend
             '2018-12-25', # Weihnachten
             '2018-12-26', # Weihnachten
             '2018-12-31', # Silvester
             '2019-01-01', # Neujahr
             '2019-02-28', # Wieverfastelovend
             '2019-03-04', # Rosenmontag
             '2019-04-19', # Karfreitag
             '2019-04-22', # Ostermontag
             '2019-05-01', # 1. Mai
             '2019-05-30', # Christi Himmelfahrt
             '2019-06-10', # Pfingst-Montag
             '2019-06-20'  # Fronleichnam
            ]

# astype('datetime64[D]') notwendig
dauer=np.busday_count(df.input.values.astype('datetime64[D]'), 
                  df.out.values.astype('datetime64[D]'), 
                    holidays=feiertage)+1

df['dauer'] = dauer

von=(min(df.input))
von=von.strftime('%d.%m.%Y')

bis=(max(df.out))
bis=bis.strftime('%d.%m.%Y')

def calc_8090_percentile(ldf, art, von, bis):
    perc80 = np.percentile(ldf.dauer,80,interpolation='lower') # lower ist richtig
    perc90 = np.percentile(ldf.dauer,90,interpolation='lower')
    return (perc80, perc90)
#end 8090_percentile
    

def plot_leadtime(ldf, art, von, bis):
    perc80 = np.percentile(ldf.dauer,80,interpolation='lower')
    perc90 = np.percentile(ldf.dauer,90,interpolation='lower')
    
    # gca stands for 'get current axis'
    ax = plt.gca()
    #hist zählt direkt die Elemente
    ldf.plot(kind='hist', y='dauer',bins=max(dauer),rwidth=1, color='black', ax=ax, label='')
    
    text80 = '80% fertig in ' + str(perc80) + ' Tagen'
    # vertical dotted line originating at mean value
    plt.axvline(perc80+.2, linestyle='-.', linewidth=1, color='blue', label=text80)

    text90 = '90% fertig in ' + str(perc90) + ' Tagen'
    # vertical dotted line originating at mean value
    plt.axvline(perc90+.2, linestyle='--', linewidth=1, color='red', label=text90)

    # Beschriftung
    plt.suptitle(art)
    
    title = von +' bis ' + bis + ': ' + str(len(ldf)) + ' Zettel'
    plt.title(title, fontsize=12)
    plt.xlabel('Dauer in Tagen')
    plt.ylabel('Anzahl')
    plt.legend(loc=0)
    
    # yticks ändern, so dass der Mindestabstand 1 ist
    ymin, ymax = plt.ylim()
    ax.yaxis.set_major_locator(plt.MultipleLocator(int(ymax/10+1)))    
    
    # x-Achse ab 0
    xmin, xmax = plt.xlim()
    plt.xlim(0, xmax)
    
    
    #plt.show()
    filename='out/'+ Team + '_' + art +'.png'
    plt.savefig(filename, dpi=300)
    plt.close()
#enddef plotte()

von=(min(df.input))
von=von.strftime('%d.%m.%Y')

bis=(max(df.out))
bis=bis.strftime('%d.%m.%Y')

# muss kein DataFrame sein, ist meine komplizierte, aber laufende Lösung
arten = pd.DataFrame(columns=['Anzahl', 'Art'])
arten = arten.astype( dtype={'Anzahl': int, 'Art' : str})


for gew_art in df.art.drop_duplicates():
    gewaehlt = df.art == gew_art
#   gewaehlt = df.input > '2018-12-31'
    plot_leadtime(df[gewaehlt], gew_art, von, bis)
    laenge = len(df[gewaehlt])
    print gew_art, laenge
    arten.loc[len(arten)] = [int(laenge), gew_art]
#endfor gew_art in ....
print arten

plot_leadtime(df, 'Alle', von, bis)

Linux Plattform weiterentwickeln 144
Security 26
Berechtigung verwalten 37
Linux patchen updaten 48
Übergabe alter Themen 22
Linux System zur Verfügung stellen 41
VMWare SAN LUN 5
ITR 18
VMWare verwalten 39
Rhel7 Mig 22
Incident 6
Am Board 19
    Anzahl                                 Art
0      144    Linux Plattform weiterentwickeln
1       26                            Security
2       37              Berechtigung verwalten
3       48               Linux patchen updaten
4       22               Übergabe alter Themen
5       41  Linux System zur Verfügung stellen
6        5                      VMWare SAN LUN
7       18                                 ITR
8       39                    VMWare verwalten
9       22                           Rhel7 Mig
10       6                            Incident
11      19                            Am Board


In [36]:
def plot_turbulence(ldf, art, von, bis):
    
    #alle Arbeitstage von ... bis
    bdate = pd.bdate_range(von, bis, holidays=feiertage, freq='C')
    Tage = bdate.strftime('%d.%m.%y')

    # erste Spalte des neuen Dataframes tur
    series = pd.DataFrame(bdate, columns=['days'])
    series['Tage'] = Tage
    
    series['lfd_no'] = np.arange(1, len(series)+1)
    #print series.lfd

    # number of pull-actions (System liquidity)
    i = 0
    sum = 0
    col = np.zeros(len(series), int) 
    for day in series.days:
        sum = len(df[df.out == day])
        sum += len(df[df.input == day])
        col[i] = sum
        i += 1
    #endfor day in series.days
    series['Pulls'] = col
    
    sum_pulls = np.sum(col)
    
    first = np.gradient(series.Pulls)
    series['First_Deviation'] = first
    # 2nd deviation = Volatility
    
    vol = np.gradient(first)
    series['Volatility'] = vol
    
    third = np.gradient(vol)
    
    # 2nd 2nd deviation = turbulence
    fourth = np.gradient(third)
    series['Volatility_of_Volatility'] = fourth
    
    from scipy import stats 
    series['Fourth_Moment'] = stats.moment(col, moment = 4)
    
    # Turbulence as in Swiftkanban
    frequency = np.zeros(max(col)+1, int) 
    for i in range (0, max(col)+1):
        frequency[i] = len (col[col == i])
        # print i, frequency[i]
       
    wahrscheinlichkeit = np.zeros(len(series), dtype = float) 

    i = 0
    for p in series.Pulls:
        wahrscheinlichkeit[i] = frequency[p] 
        i += 1
        
    real_turb = np.power(col - np.mean(col), 4) * wahrscheinlichkeit
    print real_turb 
   
    series['Turbulence'] = real_turb 
    #/ sum_pulls

    #gewaehltes y
    gew_y = 8
    print (gew_y)
    
    #ax=tur.plot(kind='bar', stacked=True,  linewidth=.1 , x='Tage', y=[2,3])
    ax=series.plot(kind='line' , x='lfd_no', y=[gew_y])
    
    #nur jeden xstep-ten Wert plotten auf x-achse
    #xstep = 5
    #xindex = np.arange(0, len(series), xstep, int)
    
    #einigeTage=Tage[xindex]
    #ax.xaxis.set_major_locator(plt.MultipleLocator(xstep)) 
    #ax.set_xticklabels(labels=einigeTage)
    
    # integers for y-Axis
    from matplotlib.ticker import MaxNLocator
    ax.yaxis.set_major_locator(MaxNLocator(integer=True))
    
    filename='turbulence/'+ Team + '_' + art + '_' + str(gew_y) +'.png'
    #filename='turbulence/'+ Team + '_' + art + '.png'

    #ax.autoscale_view()
    plt.setp(plt.gca().get_xticklabels(), rotation=45, horizontalalignment='right', size = 5) 
    
    # Beschriftung
    #plt.suptitle('Turbulenz von Team ' + Team)
    #Subtitle = 'Pulls'
   
    #title = von +' bis ' + bis + ': ' + str(len(ldf)) + ' Zettel'
    #title =  str(len(ldf)) + ' Zettel'
    title = 'Auswertung von Team ' + Team
       
    plt.title(title)
     #plt.ylabel('Anzahl')
    plt.xlabel('')
    
    plt.legend(loc=0)   
    
    # noch Testen
    # x-Achse sinnvoll verschieben!! Jeweils manuel anzupassen!!!
    xmin, xmax = plt.xlim()
    
    #plt.xlim(xmin+4, xmax-22)    
    
    #plt.ylim(-12,12)
    
    plt.savefig(filename, dpi=300)
    plt.close()

#end plot_turbulence

plot_turbulence(df, 'Turbulence', '2018-08-21', '2019-05-04')

[1.86457821e+02 1.71392768e+03 1.71392768e+03 2.23774985e+03
 2.66608107e+01 9.99756567e-06 1.01022556e+03 5.43234703e+02
 1.58114027e+03 5.43234703e+02 1.71392768e+03 2.66608107e+01
 2.01860704e+01 2.66608107e+01 3.83797280e+03 1.01167871e+04
 1.32923580e+04 2.44113285e+03 3.87360649e+04 9.99756567e-06
 4.15586017e+03 1.86457821e+02 1.58114027e+03 1.01022556e+03
 2.66608107e+01 9.99756567e-06 5.43234703e+02 2.66608107e+01
 1.86457821e+02 1.01022556e+03 2.66608107e+01 1.71392768e+03
 2.23774985e+03 1.71392768e+03 2.01860704e+01 1.86457821e+02
 5.43234703e+02 5.43234703e+02 1.71392768e+03 2.01860704e+01
 9.99756567e-06 1.58114027e+03 1.58114027e+03 3.83797280e+03
 2.66608107e+01 2.01860704e+01 9.99756567e-06 2.01860704e+01
 2.66608107e+01 1.01022556e+03 2.64259880e+03 5.43234703e+02
 3.83797280e+03 2.01860704e+01 2.66608107e+01 3.83797280e+03
 5.43234703e+02 1.01022556e+03 1.01022556e+03 1.86457821e+02
 1.01022556e+03 2.66608107e+01 2.01860704e+01 1.86457821e+02
 9.99756567e-06 2.666081

In [None]:
def plot_cfd(ldf, art, von, bis):
    
    #alle Arbeitstage von ... bis
    bdate = pd.bdate_range(von, bis, holidays=feiertage, freq='C')
    Tage = bdate.strftime('%d.%m.%y')

    # erste Spalte des neuen Dataframes cfd
    cfd = pd.DataFrame(bdate, columns=['days'])
    cfd['Tage'] = Tage

    i = 0
    sum = 0
    col = np.zeros(len(cfd), int) 
    for day in cfd.days:
        sum += len(df[df.out == day])
        col[i] = sum
        i += 1
    #endfor day in cfd.days
    cfd['fertig'] = col

    i = 0
    sum = 0
    col = np.zeros(len(cfd), int) 
    for day in cfd.days:
        sum += len(df[df.input == day])
#         print str(day), len(df[df.input == day]), len(df[df.out == day])
        sum -= len(df[df.out == day])
        col[i] = sum
        if (col[i] < 0): 
            #             print "neg", str(day), str(col[i])
                col[i] = 0
                
        i += 1
    #endfor day in cfd.days
    
    cfd['Umsetzung'] = col
        
    ax=cfd.plot(kind='bar', stacked=True,  linewidth=.1 , x='Tage', y=[2,3])
    
    #nur jeden xstep-ten Wert plotten
    xstep = 10
    xindex = np.arange(2, len(cfd), xstep, int)
    
    einigeTage=Tage[xindex]
    ax.xaxis.set_major_locator(plt.MultipleLocator(xstep)) 
    ax.set_xticklabels(labels=einigeTage)
    
    filename='out/'+ Team + '_' + art +'.png'

    #ax.autoscale_view()
    plt.setp(plt.gca().get_xticklabels(), rotation=45, horizontalalignment='right', size = 5) 
    
    # Beschriftung
    plt.suptitle('CFD von Team ' + Team)
   
    title = von +' bis ' + bis + ': ' + str(len(ldf)) + ' Zettel'
    plt.title(title)
    plt.ylabel('Anzahl')
    plt.xlabel('')
    
    plt.legend(loc=0)
    
    # noch Testen
    # x-Achse sinnvoll verschieben!! Jeweils manuel anzupassen!!!
    xmin, xmax = plt.xlim()
    plt.xlim(xmin+4, xmax-22)    
    
    plt.savefig(filename, dpi=300)
    plt.close()

#end plot_cfd

plot_cfd(df, 'CFD', von, bis)



In [None]:
def plot_art(ldf, von, bis):
        

In [None]:
# Breite des Intervals für den "gleitenden Durchschnitt" in Tagen
#breite = 

#alle Arbeitstage von ... bis
data = pd.bdate_range(von, bis, holidays=feiertage, freq='C')

# erste Spalte des neuen Dataframes perc
perc = pd.DataFrame(data, columns=['days'])

In [None]:
i = 0
p80 = np.zeros(len(perc), int) 
for day in perc.days:
    gewaehlt = df.input > day
    if (len (df.dauer[gewaehlt]) != 0):
        p80[i]  = np.percentile(df.dauer[gewaehlt],90,interpolation='lower')
    i = i + 1
#endfor day in perc.days

perc['Alle'] = p80

for gew_art in df.art.drop_duplicates():
    i = 0
    p80 = np.zeros(len(perc), int) 
    gewaehlt = df.art == gew_art
    for day in perc.days:
        gewaehlt &= df.input > day
        if (len (df.dauer[gewaehlt]) != 0):
            p80[i]  = np.percentile(df.dauer[gewaehlt],90,interpolation='lower')
        i = i + 1
    #endfor day in perc.days
 #   perc[gew_art] = p80
#endfor gew_art in ...
perc.tail()

In [None]:

perc.plot(x='days')

In [None]:
perc.plot(x='days')
filename='out/'+ 'perc_over_time' +'.png'
plt.savefig(filename, dpi=300)
plt.close()

In [None]:
fig1, ax1 = plt.subplots()
ax1.pie(arten.Anzahl,  autopct='%1.1f%%', labels=arten.Art,
        startangle=90)
ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

plt.show()


In [None]:
arten = arten.sort_values(by='Anzahl', ascending=False)

fig, ax = plt.subplots(figsize=(16, 8), subplot_kw=dict(aspect="equal"))

# Absolutwerte und Prozente
def func(pct, allvals):
    absolute = int(pct/100.*np.sum(allvals)+.5)
    return "{:.0f}%\n({:d})".format(pct, absolute)


wedges, texts, autotexts = ax.pie(arten.Anzahl, autopct=lambda pct: func(pct, arten.Anzahl),
                                  textprops=dict(color="w"), counterclock=False, startangle = 150)

ax.legend(wedges, arten.Art,
          title="Arten der Arbeit",
          loc="center left",
          bbox_to_anchor=(1, 0, 0.5, 1))

plt.setp(autotexts, size=10, weight="bold")

ax.set_title(Team + ": Verteilung nach Arten der Arbeit")

#plt.show()
art='Arten_der_Arbeit'
filename='out/'+ Team + '_' + art +'.png'
plt.savefig(filename, dpi=300)
plt.close()