In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
from pylab import rcParams
import re
import seaborn as sb

In [None]:
%matplotlib inline
rcParams['figure.figsize'] = 10, 10
sb.set_style('whitegrid')

In [None]:
results = pd.read_csv('C:/puerto_rico/data_file.csv')
results.columns = ['Name', 'Types', 'Rating', 'Number of Ratings', 'Address', 'Latitude', 'Longitude']

In [None]:
results = results.replace('None', 0)

results['Rating'] = pd.to_numeric(results['Rating'])
results['Number of Ratings'] = pd.to_numeric(results['Number of Ratings'])

In [None]:
better_results = results[results['Rating'] >= 4.5]

In [None]:
better_results['Rating'].plot(kind='hist')

In [None]:
total_better_reviews = sum(better_results['Number of Ratings'])
# total_better_reviews

In [None]:
ratio_total = round(better_results['Number of Ratings'] / total_better_reviews, 5)
# ratio_total

In [None]:
color_theme = [
    '#F6F5F6',
    '#92AECE',
    '#8686A4',
    '#99849A',
    '#485260'
]
# plt.pie(better_results['Number of Ratings'], labels=better_results['Name'], autopct='%1.1f%%', startangle=90, pctdistance=0.85, colors=color_theme)
plt.pie(better_results['Number of Ratings'], startangle=90, pctdistance=0.85, colors=color_theme)

centre_circle = plt.Circle((0,0),0.70,fc='white')
fig = plt.gcf()

fig.gca().add_artist(centre_circle)
# Equal aspect ratio ensures that pie is drawn as a circle

plt.tight_layout()
plt.show()

In [None]:
best_results = better_results.nlargest(5, 'Number of Ratings')
color_theme = [
    '#F6F5F6',
    '#92AECE',
    '#8686A4',
    '#99849A',
    '#485260'
]
plt.pie(
    best_results['Number of Ratings'],
    labels=best_results['Name'],
    autopct='%1.1f%%',
    startangle=90,
    pctdistance=0.85,
    colors=color_theme
)

centre_circle = plt.Circle(
    (0, 0),
    0.70,
    fc='white'
)

fig = plt.gcf()

fig.gca().add_artist(centre_circle)
plt.tight_layout()
plt.show()

In [None]:
total_ratings = sum(best_results['Number of Ratings'])

# create donut plots
startingRadius = 0.7 + (0.3 * (len(best_results) - 1))

for index, row in best_results.iterrows():
    num_ratings = row['Number of Ratings']
#     print(startingRadius)
    remainingPie = total_ratings - num_ratings
    donut_sizes = [remainingPie, num_ratings]

    plt.text(0.01, startingRadius + 0.07, row['Name'], horizontalalignment='center', verticalalignment='center')
    plt.pie(donut_sizes, radius=startingRadius, startangle=90, colors=['#F6F5F6', '#E6742F'],
            wedgeprops={"edgecolor": "white", 'linewidth': 1})

    startingRadius-=0.3

# equal ensures pie chart is drawn as a circle (equal aspect ratio)
# create circle and place onto pie chart
circle = plt.Circle(xy=(0, 0), radius=0.35, facecolor='white')
plt.gca().add_artist(circle)
plt.tight_layout()
# plt.savefig('donutPlot.jpg')
plt.show()

In [None]:
better_results['weighted_score'] = round(better_results['Rating'] * ratio_total, 5)

In [None]:
best_weighted_results = better_results.nlargest(5, 'weighted_score')

In [None]:
weighted_score.plot(kind='hist')

In [None]:
color_theme = [
    '#F6F5F6',
    '#92AECE',
    '#8686A4',
    '#99849A',
    '#485260'
]
plt.pie(
    best_weighted_results['weighted_score'],
    labels=best_results['Name'],
    autopct='%1.1f%%',
    startangle=90,
    pctdistance=0.85,
    colors=color_theme
)

centre_circle = plt.Circle(
    (0, 0),
    0.70,
    fc='white'
)

fig = plt.gcf()

fig.gca().add_artist(centre_circle)
plt.tight_layout()
plt.show()

In [None]:
total_ratings = sum(best_results['Number of Ratings'])

# create donut plots
startingRadius = 0.7 + (0.3 * (len(best_results) - 1))

for index, row in best_results.iterrows():
    num_ratings = row['Number of Ratings']
#     print(startingRadius)
    remainingPie = total_ratings - num_ratings
    donut_sizes = [remainingPie, num_ratings]

    plt.text(0.01, startingRadius + 0.07, row['Name'], horizontalalignment='center', verticalalignment='center')
    plt.pie(donut_sizes, radius=startingRadius, startangle=90, colors=['#F6F5F6', '#879096'],
            wedgeprops={"edgecolor": "white", 'linewidth': 1})

    startingRadius-=0.3

# equal ensures pie chart is drawn as a circle (equal aspect ratio)
# create circle and place onto pie chart
circle = plt.Circle(xy=(0, 0), radius=0.35, facecolor='white')
plt.gca().add_artist(circle)
plt.tight_layout()
# plt.savefig('donutPlot.jpg')
plt.show()

In [None]:
better_results['standarized_number_of_ratings'] = (better_results['Number of Ratings'] - better_results['Number of Ratings'].mean()) / better_results['Number of Ratings'].std()

In [None]:
better_results['standarized_ratings'] = (better_results['Rating'] - better_results['Rating'].mean()) / better_results['Rating'].std()

In [None]:
better_results.standarized_number_of_ratings.plot(kind='hist')

In [None]:
better_results.standarized_ratings.plot(kind='hist')

In [None]:
better_results.head()

In [None]:
sb.distplot(better_results.standarized_ratings)

In [None]:
ratings = better_results.groupby('Rating')['Name'].nunique()
ratings

In [None]:
values = better_results.Rating.unique()
values

In [None]:
pd.value_counts(better_results['Rating']).sort_index().plot.bar()

In [None]:
color_theme = [
    '#F6F5F6',
    '#92AECE',
    '#8686A4',
    '#99849A',
    '#485260',
    '#'
]
plt.pie(
    ratings,
    labels=better_results.Rating.unique(),
    autopct='%1.1f%%',
    startangle=90,
    pctdistance=0.85,
    colors=color_theme
)

centre_circle = plt.Circle(
    (0, 0),
    0.70,
    fc='white'
)

fig = plt.gcf()

fig.gca().add_artist(centre_circle)
plt.tight_layout()
plt.show()

In [None]:
all_types = []
for t in better_results['Types']:
    for sub_type in t.split(','):
        all_types.append(sub_type)
all_types = Series(all_types)
all_types

In [None]:
distinct_types = all_types.unique()
distinct_types

In [None]:
distinct_types_count = all_types.value_counts()
distinct_types_count

In [None]:
color_theme = [
    '#F6F5F6',
    '#92AECE',
    '#8686A4',
    '#99849A',
    '#485260',
    '#CDC8C9',
    '#8F7172',
    '#BEA88A',
    '#A98C6E',
    '#8F7172'
]
plt.pie(
    distinct_types_count,
#     labels=distinct_types,
    autopct='%1.1f%%',
    startangle=90,
    pctdistance=0.85,
    colors=color_theme
)

centre_circle = plt.Circle(
    (0, 0),
    0.70,
    fc='white'
)

fig = plt.gcf()

fig.gca().add_artist(centre_circle)
plt.legend(distinct_types, loc='center left')
plt.tight_layout()
plt.show()

In [None]:
pd.value_counts(all_types).plot.bar()