View Full Notebook Here: https://nbviewer.jupyter.org/github/dgustave/ebay-auction/blob/master/notebooks/ebay.ipynb

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
import math
from pandas_profiling import ProfileReport as pr
import plotly.express as px
import os

In [None]:
# Read CSV for useful information
ebay_df = pd.read_csv('../input/online-auctions-dataset/auction.csv')

In [None]:
ebay_df

In [None]:
ebay_df.describe()

In [None]:
# Create a report to look for analysis: 
prof = pr(ebay_df)
prof.to_file(output_file='./ebay_report.html')

In [None]:
prof

<div class="alert alert-block alert-info">
    <b>Things to know:</b><br> 
    
* auctionid: unique identifier of an auction<br>

* bid: the proxy bid placed by a bidder <br>

* bidtime: the time in days that the bid was placed, from the start of the auction<br>

* bidder: eBay username of the bidder<br>

* bidderrate: eBay feedback rating of the bidder<br>

* openbid: the opening bid set by the seller<br>

* price: the closing price that the item sold for (equivalent to the second highest bid + an increment)<br>

* item: auction item<br>

* auction_type<br>
</div>

# Inquiries in mind:

<div class="alert alert-block alert-info">
    <b>How doe it work?:</b><br> In an auction-style listing, sellers name a starting price and you bid against other buyers. You can keep track of your bidding from the Bids/Offers - opens in new window or tab section of My eBay. When the listing ends, the highest bidder wins the auction and pays for the item. Remember, a bid is a binding contract.
</div>

<div class="alert alert-block alert-info">
    <b>Ideas Worth Exploring:</b> For each item, what is the relationship between bids, bid time, and the closing price? Does this differ by length of the auction, opening bid, or by bidder rating?
</div>

<div class="alert alert-block alert-info">
    <b>From a buyer perspective:</b><br> Which types of auctions start with the smallest opening bid?<br>
    Which types are usually the priciest?<br>
</div>

<div class="alert alert-block alert-info">
    <b>As a seller, on the other hand, I would be interested to know:</b><br> Which types of auctions are more common and more profitable?<br>
    Should I start an auction with a higher or lower opening bid (to attract more bidders)?<br>
</div>

### Display change in mean of key values bid time

In [None]:
ebay_mean_df = ebay_df.groupby("bidtime").mean()
ebay_mean_df

### Get mininum and maximum values:

In [None]:
bidtime_min = ebay_df['bidtime'].min()
bidtime_max = ebay_df['bidtime'].max()

<div class="alert alert-block alert-warning">
    <b>The relationship between bids, bid time, and the closing price:</b><br> 
</div>

<div class="alert alert-block alert-info">
    <b>OB:</b> Open Bid<br>
    <b>PDA_OB:</b> Open Bid for Palm Pilot M515 PDA<br>
    <b>bt:</b> Bid Time <br>
    <b>d:</b> number of days of auction ≈ 7<br>
    <b>d3:</b> Less Than 3 day auctions or  Minimum ≈ 0 <br>
    <b>d7:</b> More Than 3 day auctions or Maximum ≈ 7<br>
    <b>n:</b> sample <br>
</div>

In [None]:
# Stats for bidtime=0.000567
OB_bt_d3 = ebay_df[(ebay_df['bidtime'] == bidtime_min) | (ebay_df['bidtime'] < 3)]['openbid']
OB_bt_mean_d3 = OB_bt_d3.describe()[1]
OB_bt_std_d3 = OB_bt_d3.describe()[2]
OB_bt_n_d3 = OB_bt_d3.describe()[0]
OB_bt_d3.describe()

In [None]:
# Stats for time=6.999990
OB_bt_d7 = ebay_df[(ebay_df['bidtime'] == bidtime_max) | (ebay_df['bidtime'] > 3)]['openbid']
OB_bt_mean_d7 = OB_bt_d7.describe()[1]
OB_bt_std_d7 = OB_bt_d7.describe()[2]
OB_bt_n_d7 = OB_bt_d7.describe()[0]
OB_bt_d7.describe()

In [None]:
plt.style.use('fivethirtyeight')

x_axis = ebay_mean_df.index
y_open_bid = ebay_mean_df['openbid']
y_price = ebay_mean_df['price']

fig, axs = plt.subplots(2)
fig.suptitle('Bid Price Change Over Time(Days)')
axs[0].plot(x_axis, y_open_bid)
axs[0].set(ylabel="Open Bid")
axs[1].plot(x_axis, y_price)
axs[1].set(ylabel="Closing Bid", xlabel="bidtime")

print(f"Initial Bid mean: {OB_bt_mean_d3}")
print(f"Final Bid mean: {OB_bt_mean_d7}")
print(f"Delta Bid mean: {OB_bt_mean_d7 - OB_bt_mean_d3}")
plt.show()
plt.savefig('./Bid_Price_Change.png')

In [None]:
pda_df = ebay_df[ebay_df['item'] == 'Palm Pilot M515 PDA'].reset_index()

In [None]:
pda_mean_df = pda_df.groupby("bidtime").mean()
pda_mean_df

<div class="alert alert-block alert-warning">
    <b>Palm Pilot M515 PDA: </b>The relationship between bids, bid time, and the closing price.<br> 
</div>

In [None]:
pda_min = pda_df['bidtime'].min()
pda_max = pda_df['bidtime'].max()

# Stats for bidtime=0.000567
PDA_OB_bt_d3 = pda_df[(pda_df['bidtime'] == bidtime_min) | (pda_df['bidtime'] < 3)]['openbid']
PDA_OB_bt_mean_d3 = PDA_OB_bt_d3.describe()[1]
PDA_OB_bt_std_d3 = PDA_OB_bt_d3.describe()[2]
PDA_OB_bt_n_d3 = PDA_OB_bt_d3.describe()[0]

# Stats for time=6.999990
PDA_OB_bt_d7 = pda_df[(pda_df['bidtime'] == bidtime_max) | (pda_df['bidtime'] > 3)]['openbid']
PDA_OB_bt_mean_d7 = PDA_OB_bt_d7.describe()[1]
PDA_OB_bt_std_d7 = PDA_OB_bt_d7.describe()[2]
PDA_OB_bt_n_d7 = PDA_OB_bt_d7.describe()[0]

In [None]:
plt.style.use('fivethirtyeight')

x_axis = pda_mean_df.index
y_open_bid = pda_mean_df['openbid']
y_price = pda_mean_df['price']

fig, axs = plt.subplots(2)
fig.suptitle('PDA Bid Price Change Over Time(Days)')
axs[0].plot(x_axis, y_open_bid)
axs[0].set(ylabel="Open PDA Bid")
axs[1].plot(x_axis, y_price)
axs[1].set(ylabel="Closing PDA Bid", xlabel="bidtime")

print(f"Initial PDA Bid mean: {PDA_OB_bt_mean_d3}")
print(f"Final PDA Bid mean: {PDA_OB_bt_mean_d7}")
print(f"Delta PDA Bid mean: {PDA_OB_bt_mean_d7 - PDA_OB_bt_mean_d3}")
plt.show()
plt.savefig('./PDA_bid_Price_Change.png')

<div class="alert alert-block alert-success">
    <b>The relationship between bids, bid time, and the closing price:</b><br> 
    Auction-style format: 1, 3, 5, 7, and 10 days. Unsold items may be automatically relisted. It seems on average most opening bids close higher, and by the the sixth and seventh day 
        the price increases on opening bid to sell for a better price. Unwanted inventory could be sold for under the opening bid just to get rid of inventory which would represent our outliers. 
</div>

<div class="alert alert-block alert-success">
    <b>From a buyer perspective:</b><br> Which types of auctions start with the smallest opening bid?<br>
    Which types are usually the priciest?<br>
</div>

<div class="alert alert-block alert-success">
    <b>As a seller, on the other hand, I would be interested to know:</b><br> Which types of auctions are more common and more profitable?<br>
</div>

In [None]:
auction_type = ebay_df[['auctionid', 'openbid', 'price', 'auction_type']].drop_duplicates(subset='auctionid')
# Get Unique auction ID's to index
auction_grouped = ebay_df.groupby('auctionid')
# Count all auction type by price
atype_count= auction_grouped['price'].agg(['count']).reset_index()
atype_count.head()

In [None]:
# Get Aggregates of numeric categories except for bids then rename columns 
a_bid = auction_grouped['bid'].agg([np.min, np.max, np.mean]).reset_index().rename(columns={"amin": "bid_min", "amax": "bid_max", "mean": "bid_mean",})
a_bid.head()

In [None]:
# Get Aggregates of numeric categories except for bidtime then rename columns 
a_bidtime = auction_grouped['bidtime'].agg([np.min, np.max, np.mean]).reset_index().rename(columns={"amin": "bidtime_min", "amax": "bidtime_max", "mean": "bidtime_mean",})
a_bidtime.head()

In [None]:
a_bidderrate = auction_grouped['bidderrate'].agg([np.min, np.max, np.mean]).reset_index().rename(columns={"amin": "bidderrate_min", "amax": "bidderrate_max", "mean": "bidderrate_mean",})
a_bidderrate.head()

In [None]:
# Merge all data frames 
auction_all = atype_count.merge(auction_type, how='left', on='auctionid')
auction_all = auction_all.merge(a_bid, how='left', on='auctionid')
auction_all = auction_all.merge(a_bidtime, how='left', on='auctionid')
auction_all = auction_all.merge(a_bidderrate, how='left', on='auctionid')
auction_all.head()

In [None]:
# Grab mean of auction types: 
auction_all_type = auction_all.groupby('auction_type')[['count', 'openbid', 'price', 'bidtime_max', 'bidderrate_mean']].agg(np.mean).reset_index()
auction_all_type

In [None]:
fig = px.scatter(auction_all, x="openbid", y="price", animation_frame="count", animation_group="auction_type",
           size="count", size_max= 60, color="auction_type", hover_name="auctionid", facet_col="auction_type",
           range_x=[-100, 6000], range_y=[-100, 6000])
fig.show()
fig.write_image('./scatter_animation.png')

<div class="alert alert-block alert-success">
    <b>Getting bidders with good ratings:</b><br> 
    The bidders with the best ratings, bid on day 3, notthing like a gurantee sale. 
</div>

In [None]:
fig = px.box(auction_all, x="auction_type", y="bidderrate_max", color="auction_type", notched=True)
fig.show()
fig.write_image('./bid_rates.png')

In [None]:
fig = px.scatter(auction_all, x="openbid", y="bidderrate_max", color="auction_type")
fig.show()
fig.write_image('./bid_rates_scatter.png')

<div class="alert alert-block alert-success">
    <b>Should I start an auction with a higher or lower opening bid (to attract more bidders)?:</b><br> 
    Overall as a seller you get more bids starting at lower price and depending on the item 
</div>

In [None]:
fig = px.scatter(auction_all, x='openbid', y='price', marginal_x="histogram", marginal_y="box")
fig.show()
fig.write_image('./bid_hist.png')

In [None]:
ebay_df['item'].unique()

<div class="alert alert-block alert-success">
    <b>How many auctions for each auction type and item?:</b><br> 
        It would be hard to compete with so many sellers. However in the long run 7 day auctions would boost the price and the amount of buyers so it would better to hold till the 7th day. 
    The PDA seems to get the most buyers overall. 
</div>

In [None]:
count_type_item = pd.get_dummies(ebay_df.drop_duplicates(subset='auctionid'), columns=['item'])
count_type_item = count_type_item[['openbid', 'price', 'auction_type', 'item_Cartier wristwatch', 'item_Palm Pilot M515 PDA', 'item_Xbox game console']].groupby(by=['auction_type'])[['item_Cartier wristwatch', 'item_Palm Pilot M515 PDA', 'item_Xbox game console']].sum()

In [None]:
print(count_type_item.sum(axis=0))
display(count_type_item)
count_type_item.plot.bar()
plt.savefig('./bid_items.png')

In [None]:
def p_value(sample_array, population_array, tails=2):
    
    sample_mean = np.average(sample_array)
    mu = np.average(population_array)
    variance = np.var(population_array)
    sigma = math.sqrt(variance)
    
    z_score = (sample_mean - mu) / sigma
    
    psr = [mu - (abs(z_score) * sigma), mu + (abs(z_score) * sigma)]
    
    pop_score = 0
    
    for item in population_array:
        if item > psr[0] and item < psr[1]:
            pop_score += 1
    pop_percent = pop_score/len(population_array)
    
    pvalue = 1 - pop_percent
    
    if tails == 1:
        pvalue = pvalue / 2
    return {"p-value": pvalue, "z-score": z_score}

In [None]:
htest = p_value(PDA_OB_bt_d3, OB_bt_d3)
htest

There is only a 10% chance that the null hypothesis is true at the outset. Consequently, the probability of rejecting a true null at the conclusion of the test must be less than 10%. ... It shows that the decrease from the initial probability to the final probability of a true null depends on the P value.

### H<sub>0</sub>: X̄<sub>BT at d<3 </sub> = μ<sub>BT at d<3</sub>
The Null Hypothesis assumes there will be no difference between the PDA open price bid time (BT) average and the all items average at d>3 greater than 3 days.

### H<sub>A</sub>: X̄<sub>BT at d>3 </sub> != μ<sub>BT at d>3</sub>
The Alternative Hypothesis assumes there will be a difference between the PDA open price bid time (BT) average and the all items average at d<3 greater than 3 days.

### Significance Level (sig_lev) is set to 5%.

In [None]:
sig_lev = 0.05

In [None]:
print(f'Final PDA Bid mean:: {PDA_OB_bt_mean_d7}')
print(f'Final Bid mean: {PDA_OB_bt_mean_d7}')

In [None]:
sig_lev = 0.05

In [None]:
atest = p_value(PDA_OB_bt_d7, OB_bt_d7)
atest

In [None]:
hyp = -0.1383410858165881/2
hyp 

-0.06917054290829405 for 1 talied distribulion for a p-value of more than 0.0001, meaning we would expect to see a result like this in 85% of all open close prices increase over time assuming the alternative hypothesis.

In [None]:
with open('./ebay_eda.txt', 'w') as f:
    f.write(' For each item, what is the relationship between bids, bid time, and the closing price? Does this differ by length of the auction, opening bid, or by bidder rating?\n')
    f.write(f'The count for each item in the dataset: {count_type_item.sum(axis=0)}')
    f.write(' It would be hard to compete with so many sellers. However in the long run 7 day auctions would boost the price and the amount of buyers so it would better to hold till the 7th day. The PDA seems to get the most buyers overall.\n')
    f.write('The bidders with the best ratings, bid on day 3, notthing like a gurantee sale.\n')
    f.write(f"Initial Bid mean: {OB_bt_mean_d3}")
    f.write(f"Final Bid mean: {OB_bt_mean_d3}")
    f.write(f"Delta Bid mean: {OB_bt_mean_d7 - OB_bt_mean_d3}")
    f.write(f"Initial PDA Bid mean: {PDA_OB_bt_mean_d3}")
    f.write(f"Final PDA Bid mean: {PDA_OB_bt_mean_d7}")
    f.write(f"Delta PDA Bid mean: {PDA_OB_bt_mean_d7 - PDA_OB_bt_mean_d3}")
    f.write(f"{atest} has a value of {hyp} for 1 talied distribulion for a p-value of more than 0.0001, meaning we would expect to see a result like this in 85% of all open close prices increase over time assuming the alternative hypothesis.")
    f.write(f"Z-score is low and p-value are extremely high, well above the 5% significance level. We reject the Null Hypothesis that price does not change when auction goes on beyon 3 days.")
    f.write(f"The bid open and close change over time, over versus the PDA has a {htest}")
    f.write('Which types of auctions start with the smallest opening bid?\n')
    f.write('The 7 day auction starts off with smallest open bid but ends up with the higest closing price.\n')
    f.write('Which types of auctions are more common and more profitable?\n')
    f.write('The more days you have to auction in item, the more likely you are to achieve a profit.\n')
    f.write('Should I start an auction with a higher or lower opening bid (to attract more bidders)?\n')
    f.write('Lower bids gurantee more bidders which increase over time, unless someone over bids to early\n')
f.close()