In [1]:
import numpy as np
import pandas as pd

In [2]:
url = 'https://github.com/mattharrison/datasets/raw/master/data/'\
    'siena2018-pres.csv'
df = pd.read_csv(url, index_col=0)

In [3]:
def tweak_siena_pres(df):
    def int64_to_uint8(df_):
        cols = df_.select_dtypes('int64')
        return (df_
                .astype({col:'uint8' for col in cols}))


    return (df
     .rename(columns={'Seq.':'Seq'})    # 1
     .rename(columns={k:v.replace(' ', '_') for k,v in
        {'Bg': 'Background',
         'PL': 'Party leadership', 'CAb': 'Communication ability',
         'RC': 'Relations with Congress', 'CAp': 'Court appointments',
         'HE': 'Handling of economy', 'L': 'Luck',
         'AC': 'Ability to compromise', 'WR': 'Willing to take risks',
         'EAp': 'Executive appointments', 'OA': 'Overall ability',
         'Im': 'Imagination', 'DA': 'Domestic accomplishments',
         'Int': 'Integrity', 'EAb': 'Executive ability',
         'FPA': 'Foreign policy accomplishments',
         'LA': 'Leadership ability',
         'IQ': 'Intelligence', 'AM': 'Avoid crucial mistakes',
         'EV': "Experts' view", 'O': 'Overall'}.items()})
     .astype({'Party':'category'})  # 2
     .pipe(int64_to_uint8)  # 3
     .assign(Average_rank=lambda df_:(df_.select_dtypes('uint8') # 4
                 .sum(axis=1).rank(method='dense').astype('uint8')),
             Quartile=lambda df_:pd.qcut(df_.Average_rank, 4,
                 labels='1st 2nd 3rd 4th'.split())
            )
    )

In [4]:
pres = tweak_siena_pres(df)

In [5]:
# iteration over columns (col_name, series) tuple
for col_name, col in pres.iteritems():
    print(col_name, type(col))
    break

Seq <class 'pandas.core.series.Series'>


In [6]:
# iteration over rows (index, row(as a series)) tuple
for idx, row in pres.iterrows():
    print(idx, type(row))
    break

1 <class 'pandas.core.series.Series'>


In [7]:
# iteration over rows as namedtuple (index as first item)
for tup in pres.itertuples():
    print(tup[0], tup.Party)
    break

1 Independent


In [8]:
scores = (pres
         .loc[:,'Background':'Average_rank']
         )

In [9]:
scores.sum(axis='columns') / len(scores.columns)

1      3.681818
2     14.454545
3      6.545455
4      9.636364
5     10.454545
6     17.181818
7     19.590909
8     25.681818
9     36.909091
10    34.409091
11    13.318182
12    29.500000
13    37.454545
14    39.409091
15    42.000000
16     4.045455
17    42.272727
18    24.227273
19    30.136364
20    27.272727
21    31.454545
22    22.181818
23    32.818182
24    19.727273
25     5.227273
26    21.318182
27    13.590909
28    38.772727
29    29.909091
30    31.954545
31     3.909091
32    11.818182
33     9.227273
34    12.727273
35    15.272727
36    26.909091
37    26.000000
38    26.818182
39    14.545455
40    20.818182
41    14.636364
42    30.363636
43    15.818182
44    39.772727
dtype: float64

In [10]:
pres.agg(['count', 'size', 'sum', lambda col: col.loc[1]])

  pres.agg(['count', 'size', 'sum', lambda col: col.loc[1]])


Unnamed: 0,Seq,President,Party,Background,Imagination,Integrity,Intelligence,Luck,Willing_to_take_risks,Ability_to_compromise,...,Court_appointments,Handling_of_economy,Executive_appointments,Domestic_accomplishments,Foreign_policy_accomplishments,Avoid_crucial_mistakes,Experts'_view,Overall,Average_rank,Quartile
count,44,44,44,44,44,44,44,44,44,44,...,44,44,44,44,44,44,44,44,44,44
size,44,44,44,44,44,44,44,44,44,44,...,44,44,44,44,44,44,44,44,44,44
sum,12345678910111213141516171819202122/2423252627...,George WashingtonJohn AdamsThomas JeffersonJam...,,968,957,990,990,990,953,968,...,990,990,990,990,990,990,990,990,990,
<lambda>,1,George Washington,Independent,7,7,1,10,1,6,2,...,1,1,1,2,2,1,2,1,1,1st


In [11]:
pres.agg({'Luck': ['count', 'size'], 'Overall': ['count', 'max']})

Unnamed: 0,Luck,Overall
count,44.0,44.0
size,44.0,
max,,44.0


In [12]:
pres.agg(Intelligence_count=('Intelligence', 'count'),
        Intelligence_size=('Intelligence', 'size')
        )

Unnamed: 0,Intelligence
Intelligence_count,44
Intelligence_size,44


In [13]:
pres.describe()

Unnamed: 0,Background,Imagination,Integrity,Intelligence,Luck,Willing_to_take_risks,Ability_to_compromise,Executive_ability,Leadership_ability,Communication_ability,...,Relations_with_Congress,Court_appointments,Handling_of_economy,Executive_appointments,Domestic_accomplishments,Foreign_policy_accomplishments,Avoid_crucial_mistakes,Experts'_view,Overall,Average_rank
count,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,...,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0
mean,22.0,21.75,22.5,22.5,22.5,21.659091,22.0,22.227273,22.5,22.5,...,22.25,22.5,22.5,22.5,22.5,22.5,22.5,22.5,22.5,22.5
std,12.409674,12.519984,12.845233,12.845233,12.845233,11.892822,12.409674,12.500909,12.845233,12.845233,...,12.519984,12.845233,12.845233,12.845233,12.845233,12.845233,12.845233,12.845233,12.845233,12.845233
min,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
25%,11.75,11.0,11.75,11.75,11.75,11.75,11.75,11.75,11.75,11.75,...,11.75,11.75,11.75,11.75,11.75,11.75,11.75,11.75,11.75,11.75
50%,22.0,21.5,22.5,22.5,22.5,22.5,22.0,22.5,22.5,22.5,...,22.5,22.5,22.5,22.5,22.5,22.5,22.5,22.5,22.5,22.5
75%,32.25,32.25,33.25,33.25,33.25,31.25,32.25,32.25,33.25,33.25,...,33.0,33.25,33.25,33.25,33.25,33.25,33.25,33.25,33.25,33.25
max,43.0,43.0,44.0,44.0,44.0,41.0,43.0,43.0,44.0,44.0,...,43.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0


In [15]:
(pres
    .select_dtypes('number')
    .pipe(lambda df_:df_.max(axis='columns')
        - df_.min(axis='columns'))
)

1     17
2     28
3     19
4     16
5     13
6     28
7     34
8     18
9     22
10    19
11    16
12    15
13     8
14     3
15     8
16    27
17    10
18    21
19    13
20    21
21    24
22    12
23     8
24    21
25    13
26    19
27    28
28    10
29    26
30    31
31    15
32    27
33    18
34    28
35    38
36    31
37    23
38    35
39    28
40    19
41    36
42    24
43    22
44    34
dtype: uint8

In [16]:
(pres
    .select_dtypes('number')
    .apply(lambda row: row.max()-row.min(), axis='columns')
)

1     17
2     28
3     19
4     16
5     13
6     28
7     34
8     18
9     22
10    19
11    16
12    15
13     8
14     3
15     8
16    27
17    10
18    21
19    13
20    21
21    24
22    12
23     8
24    21
25    13
26    19
27    28
28    10
29    26
30    31
31    15
32    27
33    18
34    28
35    38
36    31
37    23
38    35
39    28
40    19
41    36
42    24
43    22
44    34
dtype: int8

In [17]:
pres.select_dtypes('number').apply('sum') # axis = 0

Background                        968
Imagination                       957
Integrity                         990
Intelligence                      990
Luck                              990
Willing_to_take_risks             953
Ability_to_compromise             968
Executive_ability                 978
Leadership_ability                990
Communication_ability             990
Overall_ability                   990
Party_leadership                  990
Relations_with_Congress           979
Court_appointments                990
Handling_of_economy               990
Executive_appointments            990
Domestic_accomplishments          990
Foreign_policy_accomplishments    990
Avoid_crucial_mistakes            990
Experts'_view                     990
Overall                           990
Average_rank                      990
dtype: int64

In [18]:
pres.select_dtypes('number').sum()

Background                        968
Imagination                       957
Integrity                         990
Intelligence                      990
Luck                              990
Willing_to_take_risks             953
Ability_to_compromise             968
Executive_ability                 978
Leadership_ability                990
Communication_ability             990
Overall_ability                   990
Party_leadership                  990
Relations_with_Congress           979
Court_appointments                990
Handling_of_economy               990
Executive_appointments            990
Domestic_accomplishments          990
Foreign_policy_accomplishments    990
Avoid_crucial_mistakes            990
Experts'_view                     990
Overall                           990
Average_rank                      990
dtype: int64

In [19]:
import io
billing_data = \
'''cancel_date,period_start,start_date,end_date,rev,sum_payments
12/1/2019,1/1/2020,12/15/2019,5/15/2020,999,50
,1/1/2020,12/15/2019,5/15/2020,999,50
,1/1/2020,12/15/2019,5/15/2020,999,1950
1/20/2020,1/1/2020,12/15/2019,5/15/2020,499,0
,1/1/2020,12/24/2019,5/24/2020,699,100
,1/1/2020,11/29/2019,4/29/2020,799,250
,1/1/2020,1/15/2020,4/29/2020,799,250'''

In [20]:
bill_df = pd.read_csv(io.StringIO(billing_data),
                     parse_dates=['cancel_date', 'period_start', 'start_date', 'end_date'])

In [21]:
bill_df

Unnamed: 0,cancel_date,period_start,start_date,end_date,rev,sum_payments
0,2019-12-01,2020-01-01,2019-12-15,2020-05-15,999,50
1,NaT,2020-01-01,2019-12-15,2020-05-15,999,50
2,NaT,2020-01-01,2019-12-15,2020-05-15,999,1950
3,2020-01-20,2020-01-01,2019-12-15,2020-05-15,499,0
4,NaT,2020-01-01,2019-12-24,2020-05-24,699,100
5,NaT,2020-01-01,2019-11-29,2020-04-29,799,250
6,NaT,2020-01-01,2020-01-15,2020-04-29,799,250


In [22]:
def calc_unbilled_rec(vals):
    cancel_date, period_start, start_date, end_date, rev, \
        sum_payments = vals
    if cancel_date < period_start:
        return
    if start_date < period_start and end_date > period_start:
        if rev > sum_payments:
            return rev - sum_payments
        else:
            return 0

In [23]:
bill_df.apply(calc_unbilled_rec, axis='columns')

0      NaN
1    949.0
2      0.0
3    499.0
4    599.0
5    549.0
6      NaN
dtype: float64

In [24]:
pd.Series(np.select([
       (bill_df.cancel_date < bill_df.period_start),   # 1
       ((bill_df.start_date < bill_df.period_start) &  # 2
        (bill_df.end_date > bill_df.period_start) &
        (bill_df.rev > bill_df.sum_payments)),
       ((bill_df.start_date < bill_df.period_start) &  # 3
        (bill_df.end_date > bill_df.period_start) &
        (bill_df.rev <= bill_df.sum_payments))
      ],
      [np.nan, bill_df.rev - bill_df.sum_payments, 0],  # 1, 2, 3
      np.nan))  # default

0      NaN
1    949.0
2      0.0
3    499.0
4    599.0
5    549.0
6      NaN
dtype: float64