In [56]:
# Importing packages and data needed.
import pandas as pd
import plotly.express as px
df = pd.read_csv('./data/crowdfunding.csv')
df.head()

Unnamed: 0,category,device,gender,age,amount
0,Fashion,iOS,F,45-54,61.0
1,Sports,android,M,18-24,31.0
2,Technology,android,M,18-24,39.0
3,Technology,iOS,M,18-24,36.0
4,Sports,android,M,18-24,40.0


In [57]:
# Taking a look at the dimensions of the dataset.
print('This dataset has', df.shape[0], 'rows and', df.shape[1], 'columns.')

This dataset has 20658 rows and 5 columns.


In [58]:
# Checking for missing values in the dataset.
df.isna().sum()

category    0
device      0
gender      0
age         0
amount      0
dtype: int64

In [59]:
# Looking at the total amount donated.
print('A total of $', df.amount.sum(), ' has been donated, per the dataset.')

A total of $ 814070.0  has been donated, per the dataset.


In [60]:
# Checking for any invalid donation amounts (ie. zero or negative values).
if df.amount.min() <=0.0:
    print('There are invalid values in the amount column.')
else:
    print('All donations have valid amounts.')

All donations have valid amounts.


In [61]:
# Checking out the unique options in each column.
for cat_col in df.select_dtypes(include='object_'):
    print(cat_col, ':' , df[cat_col].unique(), '(' ,df[cat_col].nunique(), ')')

category : ['Fashion' 'Sports' 'Technology' 'Games' 'Environment'] ( 5 )
device : ['iOS' 'android'] ( 2 )
gender : ['F' 'M' 'U'] ( 3 )
age : ['45-54' '18-24' '35-44' '55+' '25-34'] ( 5 )


In [62]:
# Determining most donated categories.
cat_amounts = df.groupby('category', as_index=False).sum().sort_values('amount', ascending=False)
top3cats = cat_amounts['category'].head(3).values
print('The most donated categories are:', top3cats)

The most donated categories are: ['Games' 'Sports' 'Technology']


In [63]:
# Creating a new filtered dataframe with the most donated categories.
top3cats_df = df[df['category'].isin(top3cats)]
top3cats_df.sample(6)

Unnamed: 0,category,device,gender,age,amount
11931,Games,iOS,M,18-24,46.0
5819,Games,iOS,F,18-24,56.0
12953,Games,iOS,F,45-54,67.0
12839,Games,android,M,18-24,30.0
3033,Sports,iOS,F,18-24,30.0
15750,Sports,iOS,M,18-24,53.0


In [64]:
# Calculating the difference in donations between the two device types.
device_amounts = df.groupby('device', as_index=False).sum().sort_values('amount', ascending=False)
topdevice = device_amounts['device'].head(1).values
difference = device_amounts['amount'].max()- device_amounts['amount'].min()
print('The most donated device is:', topdevice[0], 'with a difference of $' , difference)

The most donated device is: iOS with a difference of $ 246980.0


In [65]:
# Determining which age bracket donates the most.
ages_amounts = df.groupby('age', as_index=False).sum().sort_values('amount', ascending=False)
topages = ages_amounts['age'].head(1).values
ages_amounts['percent'] = ages_amounts['amount'] / ages_amounts['amount'] .sum() * 100
print('The age bracket donating the most is:', topages[0], ', representing', round(ages_amounts['percent'][0], 2), '% of the total donors.')

The age bracket donating the most is: 18-24 , representing 50.5 % of the total donors.


In [66]:
# Creating a shared column for device type and age bracket.
top3cats_df['Age, Device'] = top3cats_df['age'] + ', ' + top3cats_df['device']

# Grouping by top category and age/device column, and aggregating donations as total and per gender.
aggregation = {'amount':'sum',
               'gender':lambda x:x.value_counts().index[0]}
top3cats_grp = top3cats_df.groupby(['category', 'Age, Device'], as_index=False).agg(aggregation)
top3cats_grp.sort_values('Age, Device', inplace=True)                     

# Creating Plotly Express polar bar chart.
layout_dict = dict(font_family="segoe ui",
                 font_size=15,
                 title_font_size=25,
                 legend_font_size=16,
                 title_x=0.5)
x_order = {"Age, Device": top3cats_grp['Age, Device'].to_list()}
px.bar_polar(top3cats_grp,
             r = "amount",
             theta = "Age, Device",
             color = "gender",
             color_discrete_sequence = ['pink','blue'],
             category_orders = x_order,
             hover_data = ['category', 'amount'],
             height = 750,
             width = 750,
             title = 'Total Donations by Age Bracket & Device Type').update_layout(layout_dict)