In [1]:
import pandas as pd
import numpy as np
import math
from functools import reduce

In [2]:
dat = pd.read_csv('survey.csv', index_col=0)
CF = pd.read_csv('CF.csv', index_col=0) # Carbon footprint by activity and fuel source
CF.sort_index(inplace=True)
groups = pd.read_csv('Activity_groups.csv', index_col=0) # Activity groups
QLI = pd.read_csv('QLI.csv', index_col=0) # Individual quality of life
Cmp = pd.read_csv('Cmp.csv', index_col=0) # Individual consumption
CF_unit = pd.read_csv('CF_unit.csv', index_col=0) # CF per unit consumption by individual and activity

In [3]:
# List of activities and corresponding group
groups.sort_values('Activity', inplace=True)
nans = pd.DataFrame([(c, sum([math.isnan(a) for a in QLI[c]])) for c in QLI.columns], columns=["Activity", "QLI_Nans"])
groupSummary = groups.merge(nans, on='Activity')
#groupSummary.sort_values(['QLI_Nans', 'Group'], ascending=True)

In [8]:
# Clean up activity names
oldActs = list(QLI.columns)
newActNames = ['heat_cool', 'heat_hot', 'kitchen_appliance_sm', 'TV_computer_use', 'AC_use', 'heat_pump_use', 'travel_plane_lg', 'travel_plane_sm', 'compost_bags', 'garbage_bags', 'recycling_bags', 'bath', 'drive_HOV', 'drive_2', 'drive_self', 'hzd_disposals', 'lg_item_disposals', 'shower_long', 'shower_short', 'public_trans_trips', 'use_oven', 'use_dryer', 'use_washer', 'use_cooking_range', 'use_dishwasher', 'use_self_clean_e_oven', 'wash_up']
activities = dict(zip(oldActs, newActNames))

QLI.rename(columns=activities, inplace=True)
Cmp.rename(columns=activities, inplace=True)
CF_unit.rename(columns=activities, inplace=True)

CF.sort_index(inplace=True)
CF.rename(index=activities, inplace=True)

## Individual Carbon Footprint

In [9]:
IndCF = Cmp * CF_unit
IndCF[:2]

Unnamed: 0_level_0,heat_cool,heat_hot,kitchen_appliance_sm,TV_computer_use,AC_use,heat_pump_use,travel_plane_lg,travel_plane_sm,compost_bags,garbage_bags,...,shower_long,shower_short,public_trans_trips,use_oven,use_dryer,use_washer,use_cooking_range,use_dishwasher,use_self_clean_e_oven,wash_up
Indnum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.00872,0.000872,0.0,0.000128,0.08701,0.0,0.491713,0.0,-0.1257,1.0894,...,0.00999,0.001077,0.00414,0.001404,0.000381,0.000195,0.0,0.0,0.000566,0.001848
2,0.001744,0.003052,0.0,0.000126,0.065257,0.0,0.451796,0.0,-0.0,0.7123,...,0.004329,0.001508,0.003312,0.0012,0.000488,0.00078,0.0,0.0,0.000566,0.000176


In [10]:
# Create a summary of CF units by activity
# Use this to see room for improvement within each activity
CF_unit_summary = pd.DataFrame(list(CF.max(axis=1)), columns=['Max'], index=CF_unit.max().index)
CF_unit_summary['Min'] = list(CF.min(axis=1))
CF_unit_summary['Mean'] = list(CF_unit.mean())

In [48]:
CF_unit_summary[:2]

Unnamed: 0,Max,Min,Mean
heat_cool,0.000923,0.000872,0.000873
heat_hot,0.00065,0.000436,0.000438


In [51]:
# Create summary of best/worst CF sources by activity
# Use this to recommend the best sources
CF_sources_summary = pd.DataFrame(list(CF.idxmax(axis=1)), columns=['Worst'], index=CF_unit.max().index)
CF_sources_summary['Best'] = list(CF.idxmin(axis=1))

In [53]:
CF_sources_summary[:2]

Unnamed: 0,Worst,Best
heat_cool,peak_e,NG
heat_hot,peak_e,NG


In [55]:
IndCF.sum().sort_values(ascending=False)[:5]

garbage_bags         940.361700
travel_plane_lg      538.527049
drive_self           197.631578
AC_use                96.856519
lg_item_disposals     21.311000
dtype: float64

### All individuals - CF Improvement Potential

In [17]:
# For each individual -- total CF 
IndCFTot = IndCF.sum(axis=1)
# For each individual/activity -- how much can CF unit be improved
CFu_Improve = CF_unit - CF_unit_summary.Min
# For each individual/activity -- how much can CF be improved
CF_Improve = CFu_Improve * Cmp
# For each individual -- how much can CF be improved
IndCFTot_Improve = CF_Improve.sum(axis=1)

# Individual summary of CF and improvement potential
IndCF_sum = pd.DataFrame([(tot,improve) for tot,improve in zip(IndCFTot, IndCFTot_Improve)],
                         columns=['Current', 'Improve'], index = Cmp.index)
IndCF_sum['Possible'] = IndCF_sum.Current - IndCF_sum.Improve
IndCF_sum['Pct_change'] = IndCF_sum.Improve / IndCF_sum.Current
IndCF_sum[:4]

Unnamed: 0_level_0,Current,Improve,Possible,Pct_change
Indnum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1.882629,0.263196,1.619433,0.139802
2,1.413601,0.172593,1.241008,0.122095
3,1.649389,0.17593,1.473459,0.106664
4,2.043124,0.204667,1.838457,0.100174


In [35]:
sourceNames = dat.columns[6:]
sources = [dat.pivot(index='Indnum', columns='Activity', values=i).rename(columns=activities)
          for i in sourceNames]

In [44]:
sourceNames

Index(['WH_solar', 'WH_gas', 'WH_peak_e', 'WH_OP_e', 'gas', 'NG', 'hybrid',
       'peak_e', 'OP_e', 'jet_fuel', 'waste_mgmt'],
      dtype='object')

## Organize data into people classes

Here I define a Class called 'Person', which takes one parameter to create: individual number. To scale this up, you would need to provide the survey results for quality of life importance, individual consumption, and the sources used for each activity. The methods include: 
* displaySummary: Provides a summary of current carbon footprint, possible improvement, and which sources to change to
* viewFrame: Displays a data table of users QLI, Cmp, carbon footprint, and room for improvement for each activity
* getSourceRecs: Displays a data table of activities where carbon footprint can be improved by changing to the listed sources.

In [14]:
from collections import namedtuple
Consumption = namedtuple('Consumption', newActNames)
Importance = namedtuple('Importance', newActNames)
Unit_Footprint = namedtuple('Footprint', newActNames)
Footprint = namedtuple('Footprint', newActNames)
summary_elems = ['CF_Current', 'Possible_Improvement', 'CF_Possible', 'Pct_Change_Possible']
Summary = namedtuple('Summary', summary_elems)

In [140]:
class Person:
    def __init__(self, Indnum):
        self.num = Indnum
        self.qli = Importance(*[q for q in QLI.loc[Indnum]])
        self.cmp = Consumption(*[c for c in Cmp.loc[Indnum]])
        self.cf_unit = Unit_Footprint(*[u for u in CF_unit.loc[Indnum]])
        self.cf = Footprint(*[f for f in IndCF.loc[Indnum]])
        self.df = self.getFrame()
        self.summary = self.populateSummary()
        self.recs = None
        
    def populateSummary(self):
        s = [round(cf,4) for cf in IndCF_sum.loc[self.num]]
        s[3] = str(s[3] * 100) + '%'
        return Summary(*s)
    
    def displaySummary(self):
        for _,s in enumerate(self.summary):
            print(self.summary._fields[_]+":", s)
        print("\nSources changes:")
        self.getSourceRecs()
        for _,s in self.recs.iterrows():
            print(_ + ':',s[8])
    
    def viewFrame(self):
        if self.df == None:
            self.getFrame()
        return self.df
        
    def getFrame(self):
        self.df = pd.DataFrame(list(self.qli), index = newActNames, columns=['QLI'])
        self.df['Cmp'] = list(self.cmp)
        self.df['CF_Unit'] = list(self.cf_unit)
        self.df['CF'] = list(self.cf)
        self.df['grp'] = list(groups.Group)
        # How much can the CF unit be improved
        self.df['CFu_Improve'] = self.df.CF_Unit - CF_unit_summary.Min
        self.df['CF_Improve'] = self.df.CFu_Improve * self.df.Cmp
        self.df['CF_Improve_Pct'] = self.df.CF_Improve / self.df.CF # Change / Current CF
        self.df.fillna(0, inplace=True)
        
    def getSourceRecs(self):
        #for i in range(len(sourceNames)):
            #self.df[sourceNames[i]] = sources[i].loc[self.num]
        #self.sources1 = self.df.dropna(axis=1, how='all', inplace=True)
        self.sources1 = self.df.loc[self.df.CF_Improve > 0,:]
        self.recs = self.sources1[['grp', 'Cmp']]
        self.recs = self.recs.join(CF_sources_summary.Worst) # TODO Adjust this// then impute this if none listed
        self.recs.columns = [self.recs.columns[0], self.recs.columns[1], 'Current_Source']
        self.recs = self.recs.join(sources1[['CF', 'CF_Unit', 'CFu_Improve', 'CF_Improve', 'CF_Improve_Pct']])
        self.recs = self.recs.join(CF_sources_summary.Best)
        self.recs = self.recs.sort_values(by='CF_Improve', ascending=False)[self.recs.CF_Improve > 0.001]
        return self.recs
    
    def viewGroup(self, group):
        #if self.df == None:
            #self.getFrame()
        g = self.df[self.df.grp == group].copy()
        g.dropna(axis=1, how='all', inplace=True)
        return g
    
    #def viewPlot(self):

In [141]:
p1 = Person(1)
df = p1.viewFrame()
df[:2]

Unnamed: 0,QLI,Cmp,CF_Unit,CF,grp,CFu_Improve,CF_Improve,CF_Improve_Pct
heat_cool,85.0,10.0,0.000872,0.00872,1,0.0,0.0,0.0
heat_hot,88.0,2.0,0.000436,0.000872,1,0.0,0.0,0.0


In [142]:
p1.displaySummary()

CF_Current: 1.8826
Possible_Improvement: 0.2632
CF_Possible: 1.6194
Pct_Change_Possible: 13.98%

Sources changes:
drive_self: peak_e
AC_use: NG
drive_2: peak_e
shower_long: WH_solar
drive_HOV: peak_e
wash_up: WH_solar
shower_short: WH_solar




In [143]:
p1.getSourceRecs()



Unnamed: 0,grp,Cmp,Current_Source,CF,CF_Unit,CFu_Improve,CF_Improve,CF_Improve_Pct,Best
drive_self,5,443.0,gas,0.244093,0.000551,0.000359,0.159037,0.651543,peak_e
AC_use,1,20.0,peak_e,0.08701,0.00435,0.003752,0.07505,0.862545,NG
drive_2,5,38.0,gas,0.021052,0.000554,0.000355,0.01349,0.640794,peak_e
shower_long,2,30.0,WH_peak_e,0.00999,0.000333,0.000316,0.00948,0.948949,WH_solar
drive_HOV,5,10.0,gas,0.00354,0.000354,0.000322,0.00322,0.909605,peak_e
wash_up,2,44.0,WH_peak_e,0.001848,4.2e-05,3.8e-05,0.001672,0.904762,WH_solar
shower_short,2,5.0,WH_peak_e,0.001077,0.000215,0.000203,0.001017,0.944316,WH_solar
