# Customer Analysis Project
***

###  Creating the Dataset

As part of the project, I'd like to generate a random dataset as opposed to utilizing a pre-developed one, and to test data manipulation and generation skills. Towards this end, we are using a random name list of 500 names, then inputting the individual columns utilizing random functions within Panda. 

In [8]:
# Importing packages

import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import seaborn as sns
import numpy as np

In [5]:
# Load the 500 random names stored in directory CSV file to customer dataframe.

customer = pd.read_csv('randomnames.csv')
customer

Unnamed: 0,"First, Last Name"
0,Judith Compton
1,Mamie Black
2,Jewel Russell
3,Jeanine Waller
4,Lilia Holland
...,...
495,Pierre Hickman
496,Dave Roberson
497,Susan Watts
498,Ezekiel White


In [7]:
# Split First and Last Name columns for usability. 

customer[['First Name','Last Name']] = customer['First, Last Name'].str.split(' ',expand=True)
customer

Unnamed: 0,"First, Last Name",First Name,Last Name
0,Judith Compton,Judith,Compton
1,Mamie Black,Mamie,Black
2,Jewel Russell,Jewel,Russell
3,Jeanine Waller,Jeanine,Waller
4,Lilia Holland,Lilia,Holland
...,...,...,...
495,Pierre Hickman,Pierre,Hickman
496,Dave Roberson,Dave,Roberson
497,Susan Watts,Susan,Watts
498,Ezekiel White,Ezekiel,White


In [10]:
# Generate random purchase values using numpy range. 

customer['Purchase_Amount'] = np.random.randint(0,5000, size=len(customer))
customer

Unnamed: 0,"First, Last Name",First Name,Last Name,Purchase_Amount
0,Judith Compton,Judith,Compton,3188
1,Mamie Black,Mamie,Black,3339
2,Jewel Russell,Jewel,Russell,1026
3,Jeanine Waller,Jeanine,Waller,2582
4,Lilia Holland,Lilia,Holland,291
...,...,...,...,...
495,Pierre Hickman,Pierre,Hickman,4778
496,Dave Roberson,Dave,Roberson,4185
497,Susan Watts,Susan,Watts,1614
498,Ezekiel White,Ezekiel,White,2176


In [11]:
# Generate purchase locations (at state level) using list of states. 

states = ["Alabama", "Alaska", "American Samoa", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware", "District of Columbia", "Florida", "Georgia", "Guam", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Minor Outlying Islands", "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey", "New Mexico", "New York", "North Carolina", "North Dakota", "Northern Mariana Islands", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Puerto Rico", "Rhode Island", "South Carolina", "South Dakota", "Tennessee", "Texas", "U.S. Virgin Islands", "Utah", "Vermont", "Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming"]

rng = np.random.default_rng()
customer['State'] = rng.choice(states, len(customer))
customer['State']

0              North Carolina
1      Minor Outlying Islands
2         U.S. Virgin Islands
3                North Dakota
4              South Carolina
                ...          
495              South Dakota
496                   Montana
497                  Nebraska
498                   Montana
499                     Texas
Name: State, Length: 500, dtype: object

In [12]:
customer

Unnamed: 0,"First, Last Name",First Name,Last Name,Purchase_Amount,State
0,Judith Compton,Judith,Compton,3188,North Carolina
1,Mamie Black,Mamie,Black,3339,Minor Outlying Islands
2,Jewel Russell,Jewel,Russell,1026,U.S. Virgin Islands
3,Jeanine Waller,Jeanine,Waller,2582,North Dakota
4,Lilia Holland,Lilia,Holland,291,South Carolina
...,...,...,...,...,...
495,Pierre Hickman,Pierre,Hickman,4778,South Dakota
496,Dave Roberson,Dave,Roberson,4185,Montana
497,Susan Watts,Susan,Watts,1614,Nebraska
498,Ezekiel White,Ezekiel,White,2176,Montana


In [13]:
#Create regions based on each state for grouping in visualizations. 

regions = {
'New England' : ['Connecticut', 'Maine', 'Massachusetts', 'New Hampshire', 'Rhode Island', 'Vermont'],
'Middle Atlantic' : ['Delaware', 'Maryland', 'New Jersey', 'New York', 'Pennsylvania'],
'South' : ['Alabama', 'Arkansas', 'Florida', 'Georgia', 'Kentucky', 'Louisiana', 'Mississippi', 'Missouri', 'North Carolina', 'South Carolina', 'Tennessee', 'Virginia', 'West Virginia'],
'Midwest' : ['Illinois', 'Indiana', 'Iowa', 'Kansas', 'Michigan', 'Minnesota', 'Nebraska', 'North Dakota', 'Ohio', 'South Dakota', 'Wisconsin'],
'Southwest' : ['Arizona', 'New Mexico', 'Oklahoma', 'Texas'],
'West' : ['Alaska', 'California', 'Colorado', 'Hawaii', 'Idaho', 'Montana', 'Nevada', 'Oregon', 'Utah', 'Washington', 'Wyoming']
}

In [15]:
# Create new column using the mapped values. 

region_map = {k: v for v, lst in regions.items() for k in lst}
customer['Region'] = customer['State'].map(region_map)

In [16]:
customer

Unnamed: 0,"First, Last Name",First Name,Last Name,Purchase_Amount,State,Region
0,Judith Compton,Judith,Compton,3188,North Carolina,South
1,Mamie Black,Mamie,Black,3339,Minor Outlying Islands,
2,Jewel Russell,Jewel,Russell,1026,U.S. Virgin Islands,
3,Jeanine Waller,Jeanine,Waller,2582,North Dakota,Midwest
4,Lilia Holland,Lilia,Holland,291,South Carolina,South
...,...,...,...,...,...,...
495,Pierre Hickman,Pierre,Hickman,4778,South Dakota,Midwest
496,Dave Roberson,Dave,Roberson,4185,Montana,West
497,Susan Watts,Susan,Watts,1614,Nebraska,Midwest
498,Ezekiel White,Ezekiel,White,2176,Montana,West


In [17]:
# It looks like there are nulls, check for these values. 

customer['Region'].unique()

array(['South', nan, 'Midwest', 'West', 'Middle Atlantic', 'Southwest',
       'New England'], dtype=object)

In [19]:
#Fill na to remove null values. 

customer['Region'] = customer['Region'].fillna('Other')

In [20]:
customer

Unnamed: 0,"First, Last Name",First Name,Last Name,Purchase_Amount,State,Region
0,Judith Compton,Judith,Compton,3188,North Carolina,South
1,Mamie Black,Mamie,Black,3339,Minor Outlying Islands,Other
2,Jewel Russell,Jewel,Russell,1026,U.S. Virgin Islands,Other
3,Jeanine Waller,Jeanine,Waller,2582,North Dakota,Midwest
4,Lilia Holland,Lilia,Holland,291,South Carolina,South
...,...,...,...,...,...,...
495,Pierre Hickman,Pierre,Hickman,4778,South Dakota,Midwest
496,Dave Roberson,Dave,Roberson,4185,Montana,West
497,Susan Watts,Susan,Watts,1614,Nebraska,Midwest
498,Ezekiel White,Ezekiel,White,2176,Montana,West


In [21]:
# Add Season to show potential sales / promotions that occur based on time. 

# Generate purchase locations (at state level) using list of states. 

season = ["Fall", "Winter", "Spring", "Summer"]

rng = np.random.default_rng()
customer['Season'] = rng.choice(season, len(customer))
customer['Season']

0      Summer
1      Summer
2      Summer
3        Fall
4      Spring
        ...  
495    Spring
496    Spring
497    Spring
498      Fall
499    Summer
Name: Season, Length: 500, dtype: object

In [22]:
# Add whether a promotion was applied to the values, obviously this would typically be a more complicated process, but providing as a way to introduce an additional filter for visualization efforts in a simplified fashion. 

promotion = ["Non-Holiday Discount", "Holiday Discount", "None"]

rng = np.random.default_rng()
customer['Promotion'] = rng.choice(promotion, len(customer))
customer['Promotion']

Unnamed: 0,"First, Last Name",First Name,Last Name,Purchase_Amount,State,Region,Season,Promotion
0,Judith Compton,Judith,Compton,3188,North Carolina,South,Summer,Non-Holiday Discount
1,Mamie Black,Mamie,Black,3339,Minor Outlying Islands,Other,Summer,Non-Holiday Discount
2,Jewel Russell,Jewel,Russell,1026,U.S. Virgin Islands,Other,Summer,Non-Holiday Discount
3,Jeanine Waller,Jeanine,Waller,2582,North Dakota,Midwest,Fall,
4,Lilia Holland,Lilia,Holland,291,South Carolina,South,Spring,Holiday Discount
...,...,...,...,...,...,...,...,...
495,Pierre Hickman,Pierre,Hickman,4778,South Dakota,Midwest,Spring,Non-Holiday Discount
496,Dave Roberson,Dave,Roberson,4185,Montana,West,Spring,
497,Susan Watts,Susan,Watts,1614,Nebraska,Midwest,Spring,Non-Holiday Discount
498,Ezekiel White,Ezekiel,White,2176,Montana,West,Fall,Holiday Discount


In [23]:
# Review the finalized dataframe and send to csv

customer

Unnamed: 0,"First, Last Name",First Name,Last Name,Purchase_Amount,State,Region,Season,Promotion
0,Judith Compton,Judith,Compton,3188,North Carolina,South,Summer,Non-Holiday Discount
1,Mamie Black,Mamie,Black,3339,Minor Outlying Islands,Other,Summer,Non-Holiday Discount
2,Jewel Russell,Jewel,Russell,1026,U.S. Virgin Islands,Other,Summer,Non-Holiday Discount
3,Jeanine Waller,Jeanine,Waller,2582,North Dakota,Midwest,Fall,
4,Lilia Holland,Lilia,Holland,291,South Carolina,South,Spring,Holiday Discount
...,...,...,...,...,...,...,...,...
495,Pierre Hickman,Pierre,Hickman,4778,South Dakota,Midwest,Spring,Non-Holiday Discount
496,Dave Roberson,Dave,Roberson,4185,Montana,West,Spring,
497,Susan Watts,Susan,Watts,1614,Nebraska,Midwest,Spring,Non-Holiday Discount
498,Ezekiel White,Ezekiel,White,2176,Montana,West,Fall,Holiday Discount


In [24]:
# send to csv for tableau visualization

customer.to_csv('generatedcustomers.csv', index=False)

The Finalized Tableau visualization can be found here: 