# Water Usage Analysis with SAS

## Download the and preview the xls data

In [2]:
/* Reference the data from GitHub */
filename xlfile URL 'https://github.com/pestyld/data_projects/raw/master/water_usage_analysis/data/AMI_METER_READS-METER_INFO_HOURLY.xls';

/* Change column names to valid SAS values */
options validvarname=v7;

/* Download and import the XLS file */
proc import datafile=xlfile
            dbms=xls 
            out=work.water_usage 
            replace;
run;

/* Preview the data */
proc print data=water_usage(obs=5);
run;

/* View column metadata */
ods select variables;
proc contents data=water_usage;
run;

Obs,Service,Read_Date_Time,Usage__in_Gallons
1,Water,11/15/23 12:00 AM,0
2,Water,11/14/23 11:00 PM,0
3,Water,11/14/23 10:00 PM,0
4,Water,11/14/23 9:00 PM,0
5,Water,11/14/23 8:00 PM,0
6,Water,11/14/23 7:00 PM,0
7,Water,11/14/23 6:00 PM,0
8,Water,11/14/23 5:00 PM,0
9,Water,11/14/23 4:00 PM,0
10,Water,11/14/23 3:00 PM,0

Alphabetic List of Variables and Attributes,Alphabetic List of Variables and Attributes,Alphabetic List of Variables and Attributes,Alphabetic List of Variables and Attributes,Alphabetic List of Variables and Attributes,Alphabetic List of Variables and Attributes,Alphabetic List of Variables and Attributes
#,Variable,Type,Len,Format,Informat,Label
2,Read_Date_Time,Char,17,$17.,$17.,Read Date/Time
1,Service,Char,12,$12.,$12.,Service
3,Usage__in_Gallons,Char,17,$17.,$17.,Usage in Gallons


## Prepare Data - Create Final Hourly Data
- Modify the **Read_Date_Time** character column to a valid date value
- Modify **Usage__in_Gallons** char column: rename Usage in gallons column, then convert to numeric
- Create **Date** column
- Create **Time** column
- Create **Year** column
- Format all columns accordingly
- Add labels
- Drop unnecessary columns

In [4]:
data water_clean;
    set water_usage (rename=(Usage__in_Gallons = Usage_in_Gallons_char)); /* Rename usage column to char to replace later */
    
    /* Convert usage_in_gallons to numeric */
    usage_in_gallons = input(Usage_in_Gallons_char, 8.);
    
    /* Convert read_date to numeric */
    read_date = input(Read_Date_Time, mdyampm23.);
    
    /* Create some date columns */
    Date = datepart(read_date);
    Time = timepart(read_date);
    Month = Date;
    Year = year(Date);
    MonthYear = Date;
    
    /* Format columns */
    format 
        read_date mdyampm23.
        Date date9.
        Time timeampm.
        Month monname.
        MonthYear monyy7.
        usage_in_gallons comma15.
    ;
    
    /* Labels */
    label
        read_date = 'Read Date'
        usage_in_gallons = 'Total Gallons'
    ;
    
    /* Drop columns */
    drop 
        Service 
        Read_Date_Time
        Usage_in_Gallons_char
    ;
run;

proc print data=water_clean(obs=5);
run;


/* View column metadata */
ods select variables;
proc contents data=water_clean;
run;

Obs,usage_in_gallons,read_date,Date,Time,Month,Year,MonthYear
1,0,11/15/2023 12:00 AM,15NOV2023,12:00:00 AM,November,2023,NOV2023
2,0,11/14/2023 11:00 PM,14NOV2023,11:00:00 PM,November,2023,NOV2023
3,0,11/14/2023 10:00 PM,14NOV2023,10:00:00 PM,November,2023,NOV2023
4,0,11/14/2023 9:00 PM,14NOV2023,9:00:00 PM,November,2023,NOV2023
5,0,11/14/2023 8:00 PM,14NOV2023,8:00:00 PM,November,2023,NOV2023

Alphabetic List of Variables and Attributes,Alphabetic List of Variables and Attributes,Alphabetic List of Variables and Attributes,Alphabetic List of Variables and Attributes,Alphabetic List of Variables and Attributes,Alphabetic List of Variables and Attributes
#,Variable,Type,Len,Format,Label
3,Date,Num,8,DATE9.,
5,Month,Num,8,MONNAME.,
7,MonthYear,Num,8,MONYY7.,
4,Time,Num,8,TIMEAMPM.,
6,Year,Num,8,,
2,read_date,Num,8,MDYAMPM23.,Read Date
1,usage_in_gallons,Num,8,COMMA15.,Total Gallons


## Explore Data
Simply view max, mean and min of the data to see the high and low values, then see the start and end dates.

### Overall min, mean and max of hourly data

In [5]:
proc means data=water_clean noprint;
    var usage_in_gallons date;
    output out=data_summary(drop=_TYPE_) 
        max(usage_in_gallons)=MaxGal mean(usage_in_gallons)=MeanGal min(usage_in_gallons)=MinGal4
        max(date)=MaxDate min(date)=MinDate
        ;
run;

title "Total obs, Max, Mean and Min usage and date by Hour";
proc print data=data_summary;
run;
title;

Obs,_FREQ_,MaxGal,MeanGal,MinGal4,MaxDate,MinDate
1,9796,290,5,0,15NOV2023,01OCT2022


## Analyzing Water Usage Monthly

In [237]:
proc print data=water_clean(obs=5);
run;

Obs,usage_in_gallons,read_date,Date,Time,Month,Year,MonthYear
1,0,11/15/2023 12:00 AM,15NOV2023,12:00:00 AM,November,2023,NOV2023
2,0,11/14/2023 11:00 PM,14NOV2023,11:00:00 PM,November,2023,NOV2023
3,0,11/14/2023 10:00 PM,14NOV2023,10:00:00 PM,November,2023,NOV2023
4,0,11/14/2023 9:00 PM,14NOV2023,9:00:00 PM,November,2023,NOV2023
5,0,11/14/2023 8:00 PM,14NOV2023,8:00:00 PM,November,2023,NOV2023


## Data Preparation for Visualization
- Use MEANS to summarize the data by month and year
- Use the DATA step to:
    - Identify when the meter broke (19AUG2023)
    - **num_days_in_month** - Find the number of days in each month
    - **avg_gallons_per_day** - Calculate average gallons per monht

In [238]:
/* Create monthly/year summary table */
ods output Summary=monthly_summary;
proc means data=water_clean n sum;
    var usage_in_gallons;
    class MonthYear;
run;


data monthly_summary;
    length MeterStatus $7;
    set monthly_summary;
    /* Identify broken meter months */
    if MonthYear < '01AUG2023'd then MeterStatus = 'Working';
    else MeterStatus = 'Broken';
    
    /* Avg water usage per day in a month */
    num_days_in_month = day(intnx('month',MonthYear, 0,'end'));
    avg_gallons_per_month = round(usage_in_gallons_Sum / num_days_in_month);
    
    /* Find number of days in the month */
    if MeterStatus='Broken' then do;
        usage_in_gallons_broken = usage_in_gallons_Sum;
        usage_in_gallons_avg_broken = avg_gallons_per_month;
    end;
    else do;
        usage_in_gallons_sum_labels = usage_in_gallons_Sum;
        usage_in_gallons_avg_labels = avg_gallons_per_month;
    end;
    
    /* Format the columns */
    format usage_in_gallons_Sum usage_in_gallons_sum_labels comma16.
           MonthYear monyy7.
    ;
run;

proc print data=monthly_summary;
run;

Analysis Variable : usage_in_gallons Total Gallons,Analysis Variable : usage_in_gallons Total Gallons,Analysis Variable : usage_in_gallons Total Gallons,Analysis Variable : usage_in_gallons Total Gallons
MonthYear,N Obs,N,Sum
OCT22,743,743,5720.0
NOV22,720,720,5000.0
DEC22,744,744,3840.0
JAN23,744,744,4550.0
FEB23,672,672,3890.0
MAR23,743,743,4050.0
APR23,720,720,5280.0
MAY23,744,744,3820.0
JUN23,720,720,3420.0
JUL23,744,744,4650.0

Obs,MeterStatus,MonthYear,NObs,usage_in_gallons_N,usage_in_gallons_Sum,num_days_in_month,avg_gallons_per_month,usage_in_gallons_broken,usage_in_gallons_avg_broken,usage_in_gallons_sum_labels,usage_in_gallons_avg_labels
1,Working,OCT2022,743,743,5720,31,185,.,.,5720,185
2,Working,NOV2022,720,720,5000,30,167,.,.,5000,167
3,Working,DEC2022,744,744,3840,31,124,.,.,3840,124
4,Working,JAN2023,744,744,4550,31,147,.,.,4550,147
5,Working,FEB2023,672,672,3890,28,139,.,.,3890,139
6,Working,MAR2023,743,743,4050,31,131,.,.,4050,131
7,Working,APR2023,720,720,5280,30,176,.,.,5280,176
8,Working,MAY2023,744,744,3820,31,123,.,.,3820,123
9,Working,JUN2023,720,720,3420,30,114,.,.,3420,114
10,Working,JUL2023,744,744,4650,31,150,.,.,4650,150


#### Default Visual

In [320]:
title "DEFAULT GRAPH: Monthly Water Usage from October 2022 to November 2023";
proc sgplot data=monthly_summary;
    vline MonthYear / response=usage_in_gallons_Sum;
    vline MonthYear / response=avg_gallons_per_month;
run;
title;

#### Final Visual

Create macro variables for specific settings

In [287]:
/* Set default visualization colors and setting */
%let textColor = CX3D444F;
%let myBlue = CX0766D1;
%let myRed = CXF24949;
%let myLightRed = CXFF9999;
%let lightGray = CXC1C7C9;
%let labelSize = 12pt;


/* Set the path to your folder (REQUIRED) */
%let outpath = &path\water_usage_analysis;

/* Create max y value for the graph by increasing the max value by %25  and rounding to the nearest 1,000 */
proc sql noprint;
    select round(max(usage_in_gallons_Sum) * 1.25, 1000)
        into :maxYValue trimmed
        from monthly_summary
quit;
%put &=maxYValue;


/* Create a macro for the position of water usage on Aug2023 for the annotation line */
proc sql noprint;
    select usage_in_gallons_Sum format=8.
    into :aug2023_total trimmed
    from monthly_summary
    where MonthYear='01Aug2023'd;
quit;
%put &=aug2023_total;


579                                                        The SAS System                          09:09 Thursday, November 23, 2023

13288      ods listing close;ods html5 (id=saspy_internal) file=_tomods1 options(bitmap_mode='inline') device=svg style=HTMLBlue;
13288    ! ods graphics on / outputfmt=png;
[38;5;21mNOTE: Writing HTML5(SASPY_INTERNAL) Body file: _TOMODS1[0m
13289      
13290      /* Set default visualization colors and setting */
13291      %let textColor = CX3D444F;
13292      %let myBlue = CX0766D1;
13293      %let myRed = CXF24949;
13294      %let myLightRed = CXFF9999;
13295      %let lightGray = CXC1C7C9;
13296      %let labelSize = 12pt;
13297      
13298      
13299      /* Set the path to your folder (REQUIRED) */
13300      %let outpath = &path\water_usage_analysis;
13301      
13302      /* Create max y value for the graph by increasing the max value by %25  and rounding to the nearest 1,000 */
13303      proc sql noprint;
13304          select round(max(us

Create my annotation table to add annotations to the visual.

In [321]:
/* Import the annotation macros */
%SGANNO

/* Create annotation data set for the graph */
data myAnno;
    /* 2022 and 2023 labels */
    %sgtext(drawspace='datavalue',x1='01Oct2022'd, y1=2, label="2022", width = 10, 
            justify="left", textcolor = "&lightGray", textSize=16, anchor='bottomleft', discreteoffset=-.5);
    %sgtext(drawspace='datavalue',x1='01Jan2023'd, y1=2, label="2023", width = 10, 
            justify="left", textcolor = "&lightGray", textSize=16, anchor='bottomleft', discreteoffset=0);
    
    /* Bad water meter text and shading */
    %sgline(drawspace="datavalue", linepattern='shortdash', lineColor="&myRed",
            x1='01Aug2023'd, x2='01Sep2023'd, y1=&aug2023_total, y2=&aug2023_total);
    %sgline(drawspace="datavalue",
            x1='01Sep2023'd, x2='01Sep2023'd, y1=&aug2023_total, y2=6000);
    %sgtext(drawspace='datavalue', x1='01Aug2023'd, y1=&maxYValue-500, label="Our home water meter broke on August 19, 2023, and has not been repaired.", width = 25, justify="center", 
            textcolor = "white", textSize=11, anchor='topleft', discreteoffset=+.15,
            fillColor="&myRed", textweight="bold", reset='all');
    %sgrectangle(drawspace='datavalue', 
                 x1='01Aug2023'd , widthunit='data', width='01Oct2023'd,
                 y1=0, heightunit='data', height=&maxYValue,
                 display = 'fill', filltransparency=.9, fillcolor="&myRed", anchor='bottomleft',reset='all');
run;

/* View the data */
proc print data=myAnno;
run;

Obs,ANCHOR,DISPLAY,DRAWSPACE,FILLCOLOR,FUNCTION,HEIGHTUNIT,JUSTIFY,LABEL,LINECOLOR,LINEPATTERN,TEXTCOLOR,TEXTWEIGHT,WIDTHUNIT,DISCRETEOFFSET,TEXTSIZE,WIDTH,X1,Y1,X2,Y2,FILLTRANSPARENCY,HEIGHT
1,bottomleft,,datavalue,,TEXT,,left,2022,,,CXC1C7C9,,,-0.50,16,10,22919,2,.,.,.,.
2,bottomleft,,datavalue,,TEXT,,left,2023,,,CXC1C7C9,,,0.00,16,10,23011,2,.,.,.,.
3,bottomleft,,datavalue,,LINE,,left,2023,CXF24949,shortdash,CXC1C7C9,,,0.00,16,10,23223,2550,23254,2550,.,.
4,bottomleft,,datavalue,,LINE,,left,2023,CXF24949,shortdash,CXC1C7C9,,,0.00,16,10,23254,2550,23254,6000,.,.
5,topleft,,datavalue,CXF24949,TEXT,,center,"Our home water meter broke on August 19, 2023, and has not been repaired.",,,white,bold,,0.15,11,25,23223,6500,.,.,.,.
6,bottomleft,fill,datavalue,CXF24949,RECTANGLE,data,,,,,,,data,.,.,23284,23223,0,.,.,0.9,7000


Create final visualization

In [322]:
/* Save the visual as a PNG file and set size */
ods listing gpath="&outpath";
ods graphics / imagename="Water_Usage_Monthly" imagefmt=png width = 9in height = 4in;

/* Titles and footnotes */
title justify = left color = &textColor height=14pt "Flowing Through Time: A Visual Journey of My Family's Monthly Water Usage (Gallons)";
title2 justify = left color = &textColor height=12pt  "October 2022 - November 2023";
footnote justify = left color = &textColor height=8pt italic "Total is total usage per month, Avg is average usage per month";
footnote2 justify = left color = &textColor height=8pt italic "Created on November 11, 2023";

/* Visualization */
proc sgplot data = monthly_summary
            noborder NOCYCLEATTRS
            nowall
            sganno = myAnno
            noautolegend;
    /* Refline for new year */
    refline 'Jan2023' / axis=x labelpos=min labelloc=inside lineattrs=(color=lightgray);
    
    /* Sum gallons lines (working and broken) */
    /*WORKING TOTAL LINE */
    vline MonthYear / 
        response=usage_in_gallons_Sum
        lineattrs=(thickness=3 color=&myBlue)
        dataskin=none 
        markers markerattrs=(symbol=CircleFilled size=10 color=&myBlue)
        datalabel=usage_in_gallons_sum_labels datalabelattrs=(color=&textColor)
        curvelabel='Total' curvelabelpos=min curvelabelattrs=(color=&myBlue)
    ;
    /*BROKEN TOTAL LINE */
    vline MonthYear /
        response=usage_in_gallons_broken
        lineattrs=(thickness=3 color=&myRed)
        markers markerattrs=(symbol=CircleFilled size=10.5 color=red)
        dataskin=none
    ;
    /* Avg gallons lines (working and broken) */
    /*WORKING AVG LINE */
    vline MonthYear / 
        response=avg_gallons_per_month 
        y2axis
        lineattrs=(color=&myBlue)
        datalabel=usage_in_gallons_avg_labels datalabelattrs=(color=&textColor)
        dataskin=none
        markers markerattrs=(color=&myBlue symbol=CircleFilled size=6)
        curvelabel='Avg' curvelabelpos=min curvelabelattrs=(color=&myBlue)
    ;
    /*BROKEN AVG LINE */
    vline MonthYear /
        response=usage_in_gallons_avg_broken
        y2axis
        lineattrs=(color=red)
        dataskin=none
        markers markerattrs=(color=&myRed symbol=CircleFilled size=6.5)
    ;
    /* Axis attributes */
    xaxis display=(NOLABEL NOTICKS)
          valueattrs=(color=&textColor size=10pt);
    yaxis display=NONE 
          offsetmin=0
          max=&maxYValue
          label='' 
          labelattrs=(color=&textColor size=&labelSize);
    
    y2axis display=NONE 
          offsetmin=0
          max=800
          labelattrs=(color=&textColor size=&labelSize);
run;
title;

/* Clear all */
ods graphics / reset;
title;
footnote;