# Data Analysis Results for Non-Technical Team

### Imports

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display
import data_analysis

### Load Data 

In [2]:
def load_data():    
    item_summary_df = pd.read_csv('new_data/item_summary_df.csv')
    people_df = pd.read_csv('new_data/people_df.csv')
    promotions_df = pd.read_csv('new_data/promotions_df.csv')
    store_summary_df = pd.read_csv('new_data/store_summary_df.csv')
    transactions_df = pd.read_csv('new_data/transactions_df.csv')
    transfers_df = pd.read_csv('new_data/transfers_df.csv')
    user_transactions_df = pd.read_csv('new_data/user_transactions_df.csv')
    user_transfers_df = pd.read_csv('new_data/user_transfers_df.csv')

    conformed_data = {
        'item_summary': item_summary_df,
        'people': people_df,
        'promotions': promotions_df,
        'store_summary': store_summary_df,
        'transactions': transactions_df,
        'transfers': transfers_df,
        'user_transactions': user_transactions_df,
        'user_transfers': user_transfers_df
    }

    return conformed_data

data = load_data()
item_summary_df = data['item_summary']
people_df = data['people']
promotions_df = data['promotions']
store_summary_df = data['store_summary']
transactions_df = data['transactions']
transfers_df = data['transfers']
user_transactions_df = data['user_transactions']
user_transfers_df = data['user_transfers']

### Promotion Response Count

In [None]:
def promotion_response_counts():
    """Updates the graph based on selected response."""
    yes_count = data_analysis.promotion_responded(promotions_df, 'Yes')
    no_count = data_analysis.promotion_responded(promotions_df, 'No')

    labels = ['Yes', 'No']
    counts = [yes_count, no_count]

    plt.figure(figsize=(8, 5))
    bars = plt.bar(labels, counts, color=['green', 'red'])
    plt.bar_label(bars)
    plt.title('Promotion Response Count')
    plt.xlabel('Response')
    plt.ylabel('Count')
    plt.show()
promotion_response_counts()

### Promotion Acceptance Rate

In [None]:
def client_response_rate():
    """Updates the graph based on selected response."""
    response = data_analysis.client_response_rate(promotions_df)

    labels = ['Yes', 'No']
    values = [response[1], response[0]]

    plt.figure(figsize=(8, 5))
    plt.pie(values, labels=labels, autopct='%1.1f%%',colors=['green', 'red'])
    plt.title("Promotions' acceptance rate")
    plt.axis('equal')
    plt.show()

client_response_rate()

### Promotions per country

In [None]:
def promotions_per_country():
    pivot = data_analysis.promotions_by_type_of_clients(promotions_df, people_df).pivot(index='promotion', columns='country', values='count')
    ax = pivot.plot(kind='bar', figsize=(10, 6), color=['red', 'green', 'purple', 'blue', 'orange'], width=0.6)

    plt.title('Promotions per Country')
    plt.xlabel('Promotions')
    plt.ylabel('Count')
    plt.xticks(rotation=45)
    plt.yticks(ticks=[0,4,8,12,16,20])
    plt.legend(title='Country', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.show()

promotions_per_country()

### Items sold per country

In [None]:
print("Items sold per country")
data_analysis.items_sold_per_country(transactions_df, people_df)


### Acceptance rate of promotions per country

In [None]:
from matplotlib.ticker import PercentFormatter

def promotions_per_country_with_acceptance_rate():
    pivot = data_analysis.promotions_by_type_of_clients_with_acceptance_rate(promotions_df, people_df).pivot(index='promotion', columns='country', values='acceptance_rate')
    ax = pivot.plot(kind='bar', figsize=(10, 6), color=['red', 'green', 'purple', 'blue', 'orange'], width=0.6)

    plt.title('Acceptance Rate of Promotions per Country')
    plt.xlabel('Promotions')
    plt.ylabel('Acceptance Rate')
    plt.xticks(rotation=45)
    plt.yticks(ticks=[0,10,20,30,40,50,60,70,80,90,100])
    ax.yaxis.set_major_formatter(PercentFormatter())
    plt.legend(title='Country', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.show()

promotions_per_country_with_acceptance_rate()

### Highest spending clients and their promotions

In [None]:
print('Highest spending clients and their promotions:')
data_analysis.possible_conversion_strategy(user_transactions_df, promotions_df, people_df)

### Top n selling items

In [None]:
def plot_top_selling_items(n):
    top_selling_items = data_analysis.top_selling_items(item_summary_df, n)

    plt.figure(figsize=(10, 6))
    bars = plt.bar(top_selling_items['item'], top_selling_items['items_sold'], color='blue')
    bars = plt.bar_label(bars)
    plt.title(f'Top {n} Selling Items')
    plt.xlabel('Items')
    plt.ylabel('Quantity Sold')
    plt.xticks(rotation=45)
    plt.show()

n_widget = widgets.IntSlider(value=5, min=1, max=5, step=1, description='Top n items')
widgets.interact(plot_top_selling_items, n=n_widget)

### Which clients have what type of promotions?

From the data shown above, we can get various insights on what clients have the different types of promotions. As seen in the "Promotions per Country" graph, there is a heavy focus on promotion towards USA, with Canada being 2nd in most cases. Clients in the USA are far more prone to receive all types of promotions than any other country.

### Suggestions on how to turn "No" responses from clients in the promotions file.

- Looking at the table "Items sold per country" we can see that there are no transactions happening outside the USA. Looking at the numbers of promotions outside the USA, we can also see that there is not much focus on promotions happening outside the USA. A good start would be to focus on promoting or catering towards the countries that are showing low acceptance rate of promotions. For example, Coca-Splash has 0% acceptance rate in Canada, Spain and UK. Likewise, Colgatex has 0% in Canada and Spain. Although it is a lower sample size, increasing the promotion numbers and focusing some more in these places outside the USA with low acceptance rate can help increase the number of "Yes" responses from clients.
- Another approach would be to increase promotions on the top selling items. We can see that the item most promoted in the US is OREOZ, but compared to Dovee, it is bought way less often. If they shift the focus to promoting Dovee more, since it is also the one that is being bought the most, people are more likely to respond "Yes" than on items they might not need.
- In the table "Highest spending clients and their promotions", I aim to show that most of the top spenders haven't received promotions. As they are actively spending, sending promotions their way would probably entice them to accept, and would help increase the amount of "Yes" responses.
- On items with low acceptance rates of promotions, offering discounts or bundles could help turning the "No" responses into "Yes".

### People that bought the promoted item after responding "Yes" to a promotion

In [None]:
data_analysis.promotion_response_yes_buyer(promotions_df, transactions_df)

In the output above, we see only one record. The method that was called is supposed to output all of the people that bought the item that they responded "Yes" to in promotions_df. We can see that only 1 person, accounting for around 2% of the people that accepted promotions, bought the item that was promoted to them after responding "Yes".

### Top n items promoted

In [None]:
def plot_top_promoted_items(n):
    top_promoted_items = data_analysis.analyze_client_promotions(promotions_df, people_df, n)

    plt.figure(figsize=(10, 6))
    bars = plt.bar(top_promoted_items['promotion'], top_promoted_items['client_count'], color='blue')
    bars = plt.bar_label(bars)
    plt.title(f'Top {n} Promoted Items')
    plt.xlabel('Items Promoted')
    plt.ylabel('Client Count')
    plt.xticks(rotation=45)
    plt.show()

n_widget = widgets.IntSlider(value=5, min=1, max=5, step=1, description='Top n items')
widgets.interact(plot_top_promoted_items, n=n_widget)

### Top n most profitable items

In [None]:
def plot_most_profitable_items(n):
    most_profitable_items = data_analysis.most_profitable_items(item_summary_df, n)

    plt.figure(figsize=(10, 6))
    bars = plt.bar(most_profitable_items['item'], most_profitable_items['total_revenue'], color='blue')
    bars = plt.bar_label(bars)
    plt.title(f'Top {n} Most Profitable Items')
    plt.xlabel('Items')
    plt.ylabel('Revenue')
    plt.xticks(rotation=45)
    plt.show()

n_widget = widgets.IntSlider(value=5, min=1, max=5, step=1, description='Top n items')
widgets.interact(plot_most_profitable_items, n=n_widget)

### Top n most profitable stores

In [None]:
def plot_most_profitable_stores(n):
    most_profitable_stores = data_analysis.most_profitable_stores(store_summary_df, n)

    plt.figure(figsize=(10, 6))
    bars = plt.bar(most_profitable_stores['store'], most_profitable_stores['total_revenue'], color='blue')
    bars = plt.bar_label(bars)
    plt.title(f'Top {n} Most Profitable Stores')
    plt.xlabel('Stores')
    plt.ylabel('Revenue')
    plt.xticks(rotation=45)
    plt.show()

n_widget = widgets.IntSlider(value=5, min=1, max=5, step=1, description='Top n stores')
widgets.interact(plot_most_profitable_stores, n=n_widget)

### Top n stores with most transactions

In [None]:
def plot_most_transactions_stores(n):
    most_transactions_stores = data_analysis.most_bought_from_stores(store_summary_df, n)

    plt.figure(figsize=(10, 6))
    bars = plt.bar(most_transactions_stores['store'], most_transactions_stores['total_transactions'], color='blue')
    bars = plt.bar_label(bars)
    plt.title(f'Top {n} Stores with Most Transactions')
    plt.xlabel('Stores')
    plt.ylabel('Transactions')
    plt.xticks(rotation=45)
    plt.show()

n_widget = widgets.IntSlider(value=5, min=1, max=5, step=1, description='Top n stores')
widgets.interact(plot_most_transactions_stores, n=n_widget)

### User's favorite store

In [None]:
widget_user = widgets.IntText(value=1, description='User ID:', min=1, max=1000)

def show_favorite_store(user_id):
    favorite_store = data_analysis.favorite_store_by_user_id(user_transactions_df, user_id)
    if not pd.isna(favorite_store):
        print(f"User {user_id}'s favorite store is {favorite_store}")
    else:
        print(f"User {user_id} has not made any transactions at any store")

widgets.interact(show_favorite_store, user_id=widget_user);

### Top n users with the most sent amount

In [None]:
def plot_top_senders(n):
    top_senders = data_analysis.top_senders(user_transfers_df, people_df, n)

    plt.figure(figsize=(10, 6))
    bars = plt.bar(top_senders['first_name'] + ' ' + top_senders['last_name'], top_senders['total_sent'], color='blue')
    bars = plt.bar_label(bars)
    plt.title(f'Top {n} Users with the most sent amount')
    plt.xlabel('Users')
    plt.ylabel('Amount Sent (USD)')

    plt.xticks(rotation=45)
    plt.show()

n_widget = widgets.IntSlider(value=3, min=1, max=10, step=1, description='Top n users')
widgets.interact(plot_top_senders, n=n_widget)

### Top n users with the most received amount

In [None]:
def plot_top_receivers(n):
    top_receivers = data_analysis.top_receivers(user_transfers_df, people_df, n)

    plt.figure(figsize=(10, 6))
    bars = plt.bar(top_receivers['first_name'] + ' ' + top_receivers['last_name'], top_receivers['total_received'], color='blue')
    bars = plt.bar_label(bars)
    plt.title(f'Top {n} Users with the most received amount')
    plt.xlabel('Users')
    plt.ylabel('Amount Received (USD)')

    plt.xticks(rotation=45)
    plt.show()

n_widget = widgets.IntSlider(value=3, min=1, max=10, step=1, description='Top n users')
widgets.interact(plot_top_receivers, n=n_widget)

### Top n cities by money spent

In [None]:
def plot_top_cities_by_amount_spent(n):
    top_cities = data_analysis.total_spent_by_city(user_transactions_df, people_df, n)

    plt.figure(figsize=(10, 6))
    bars = plt.bar(top_cities['city'], top_cities['total_spent'], color='blue')
    bars = plt.bar_label(bars)
    plt.title(f'Top {n} Cities by Total Amount Spent (USD) by Users')
    plt.xlabel('Cities')
    plt.ylabel('Amount Spent (USD)')

    plt.xticks(rotation=45)
    plt.show()

n_widget = widgets.IntSlider(value=3, min=1, max=5, step=1, description='Top n cities')
widgets.interact(plot_top_cities_by_amount_spent, n=n_widget)

### Top n cities by transactions done by users

In [None]:
def plot_top_cities_by_transactions_done(n):
    top_cities = data_analysis.total_transactions_by_city(user_transactions_df, people_df, n)

    plt.figure(figsize=(10, 6))
    bars = plt.bar(top_cities['city'], top_cities['transaction_count'], color='blue')
    bars = plt.bar_label(bars)
    plt.title(f'Top {n} Cities by total transactions done by users')
    plt.xlabel('Cities')
    plt.ylabel('Transactions')

    plt.xticks(rotation=45)
    plt.show()

n_widget = widgets.IntSlider(value=3, min=1, max=5, step=1, description='Top n cities')
widgets.interact(plot_top_cities_by_transactions_done, n=n_widget)