In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
x = list(range(10))
y = [x**2 for x in x]

In [None]:
x

In [None]:
y

In [None]:
plt.plot(x, y);

In [None]:
z = [x**3 for x in x]

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))

plt.title("Exponentiated Integers", fontsize=20)
plt.xlabel("Integer Value", fontsize=16)
plt.ylabel("Exponent Value", fontsize=16)
ax.plot(x, y, label="x-squared")
ax.plot(x, z, label="x-cubed")
plt.legend(fontsize=14)


In [None]:
diamonds = pd.read_csv("Diamonds Prices2022.csv")

In [None]:
diamonds.head()

In [None]:
diamond_average_df = diamonds.groupby(["cut"]).agg({"price":"mean"}).sort_values("price", ascending=False)

diamond_average = diamonds["price"].mean()

In [None]:
plt.axhline(diamond_average, 0, 1, color="red", label="mean")

plt.bar(x=diamond_average_df.index, height=diamond_average_df["price"])

In [None]:
plt.axvline(diamond_average, 0, 1, color="red", label="mean", ls="--")

plt.barh(y=diamond_average_df.index, width=diamond_average_df["price"])

In [None]:
diamond_pivot = diamonds.pivot_table(index="cut",
                                     columns = "clarity",
                                     values= {"carat":"mean"}
                                    ).droplevel(0, axis=1)

diamond_pivot

In [None]:
import seaborn as sns

sns.heatmap(diamond_pivot)

In [None]:
housing_raw = pd.read_csv("housing_prices.tsv000", 
                      sep="\t", 
                      parse_dates=["period_begin", "period_end"],
                      dtype=({"total_homes_sold": "Int32"}),
                      usecols=["period_begin", 
                               "period_end", 
                               "region_type",
                               "region_name", 
                               "duration", 
                               "total_homes_sold", 
                               "median_active_list_price"
                              ]
)

In [None]:
housing_raw.loc[housing_raw["region_name"].str.contains("CA"), "region_name"].value_counts()

In [None]:
housing_raw.info(memory_usage="deep")

In [None]:
housing_raw["region_name"].value_counts()

In [None]:
housing_raw.loc[housing_raw["region_name"].str.contains("Francisco"), "region_name"].value_counts()

In [None]:
housing = housing_raw.loc[(housing_raw["region_name"].str.contains("CA"))
                          & (housing_raw["duration"] == "4 weeks")
                          & (housing_raw["region_type"] == "county")  ]

In [None]:
housing.head()

In [None]:
housing["region_name"] = housing["region_name"].str[0:-11]

housing.head()

In [None]:
top10 = (housing
 .groupby("region_name")
 .agg({"median_active_list_price": "mean"})
 .sort_values("median_active_list_price", ascending=False)
 .round()
 .iloc[:10]
)

In [None]:
top10

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))

ax.bar(top10.index, top10["median_active_list_price"])

plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))

plt.rcParams["font.size"] = 14

ax.ticklabel_format(style='plain')

ax.bar(top10.index, top10["median_active_list_price"])

ax.set_title("Top 10 California Counties by Home Price")

ax.set_ylabel("Price", fontsize=14)
ax.set_xlabel("County", fontsize=14)

plt.xticks(rotation=35, wrap=False, fontsize=12)

plt.show()

In [None]:
housing_pivot = housing.pivot_table(index="period_begin",
                                    columns="region_name",
                                    values="median_active_list_price")

housing_pivot

In [None]:
sf_pre_covid_mean = housing.loc[(housing["period_begin"].dt.year < 2020)
                                & (housing["region_name"].str.contains("Francisco")), "median_active_list_price"].mean()

sf_pre_covid_mean

In [None]:
fig, ax = plt.subplots()

fig.suptitle("Impact of Covid Differs Across California Cities", fontsize=16)

ax.ticklabel_format(style='plain')

ax.plot(housing_pivot.index, housing_pivot["Los Angeles County, CA"], label="LA Median Price")
ax.plot(housing_pivot.index, housing_pivot["San Diego County, CA"], label="SD Median Price")
ax.plot(housing_pivot.index, housing_pivot["San Francisco County, CA"], label="SF Median Price")

plt.axvline(18341, c="r", ls="--")
ax.annotate("Start of Covid",
            xy=(.5, .5),
            xytext=(.1, .1)
           )

ax.annotate("Start of Covid",
            xy=(18300, 1400000),
            xytext=(17600, 1200000),
            arrowprops=dict(facecolor="black", 
                            width=1, 
                            headwidth=8,
                            connectionstyle="angle3, angleA=270, angleB=0"),
            verticalalignment="center"
           )


plt.legend(bbox_to_anchor=(1, 1))

;

In [None]:
pd.to_datetime("2020-03-31") - pd.to_datetime("1970-01-01")

In [None]:
reduced = (housing
 .query("region_name in ['Los Angeles County, CA', 'San Francisco County, CA']")
 .sort_values("period_begin")
 .loc[(housing["period_begin"].dt.year == 2017) & (housing["period_begin"].dt.month < 4)]
)

In [None]:
mean_prices = reduced.groupby("region_name").agg({"median_active_list_price":"mean"})

In [None]:
mean_prices

In [None]:
fig, ax = plt.subplots()

ax.ticklabel_format(style='plain')

ax.bar(mean_prices.index, mean_prices["median_active_list_price"], label="Mean Prices 2017")
fig.legend(loc="upper right")

In [None]:
april_22 = housing_raw.loc[(housing_raw["period_begin"] == '2022-03-28') & (housing_raw["duration"] == '4 weeks')]

In [None]:
fig, ax = plt.subplots()

ax.hist(april_22["median_active_list_price"], 
        bins=20,
        density=True
       )

In [None]:
april_22.head(30)

In [None]:
housing_raw.head()

In [None]:
pd.read_csv("housing_prices.tsv000", 
                      sep="\t", 
                      parse_dates=["period_begin", "period_end"],
                      dtype=({"total_homes_sold": "Int32"}),
                      
).assign(state = np.where(lambda x: x["region_type"] == "county", 
                          x["region_name"].str[-2:], 
                          x["region_name"].str[-14:-12]))

In [None]:
housing_raw = housing_raw.assign(
    state=np.where(
        housing_raw["region_type"] == "county", 
        housing_raw["region_name"].str[-2:], 
        housing_raw["region_name"].str[-13:-11]
    ))

In [None]:
housing_raw["state"].value_counts(normalize=True).plot.bar(figsize=(8, 8))

In [None]:
housing_raw.loc[housing_raw["region_name"] == "Salt Lake County, UT"].sum()

In [None]:
state_mean = (housing_raw
 .loc[(housing_raw["period_begin"] == '2022-03-28') & (housing_raw["duration"] == '4 weeks'), 
      ["state","median_active_list_price"]
     ]
 .groupby("state").agg({"median_active_list_price": "mean"}))

In [None]:
fig, ax = plt.subplots()

mass = state_mean.loc["MA"]
hi = state_mean.loc["HI"]

ax.annotate("Massachusetts",
            xy=(1, mass),
            xytext=(.6, mass-2),
            arrowprops=dict(arrowstyle="->",
                        connectionstyle="arc3, rad=.15")
           )

ax.annotate("Hawaii",
            xy=(1, hi),
            xytext=(.6, hi-1.2),
            arrowprops=dict(arrowstyle="->",
                        connectionstyle="arc3, rad=.15")
           )
ax.boxplot(state_mean)

In [None]:
state_mean.sort_values("median_active_list_price", ascending=False)

In [None]:
county_mean = (housing_raw.loc[(housing_raw["period_begin"] == '2022-03-28') 
               & (housing_raw["duration"] == '4 weeks')
               & (housing_raw["region_type"] == "county"), 
              ["region_name","median_active_list_price"]]
             .groupby("region_name").agg({"median_active_list_price": "mean"}))

In [None]:
county_mean.sort_values("median_active_list_price", ascending=False)

In [None]:
county_mean.plot.box()

In [None]:
county_mean.loc["Salt Lake County, UT"]

In [None]:
fig, ax = plt.subplots()

mass = state_mean.loc["MA"]
hi = state_mean.loc["HI"]

ax.annotate("Massachusetts",
            xy=(1, mass),
            xytext=(.6, mass-2),
            arrowprops=dict(arrowstyle="->",
                        connectionstyle="arc3, rad=.15")
           )

ax.annotate("Hawaii",
            xy=(1, hi),
            xytext=(.6, hi-1.2),
            arrowprops=dict(arrowstyle="->",
                        connectionstyle="arc3, rad=.15")
           )
ax.boxplot(state_mean)

In [None]:
state_mean.sort_values("median_active_list_price", ascending=False, inplace=True)

In [None]:
fig, ax = plt.subplots(figsize=(12, 12))

mass = state_mean.loc["MA"]
hi = state_mean.loc["HI"]

ax.annotate("Massachusetts",
            xy=(1, mass),
            xytext=(3, mass-2),
            arrowprops=dict(arrowstyle="->",
                        connectionstyle="arc3, rad=.15")
           )

ax.annotate("Hawaii",
            xy=(1, hi),
            xytext=(3, hi-1.2),
            arrowprops=dict(arrowstyle="->",
                        connectionstyle="arc3, rad=.15")
           )

ax.bar(x=state_mean.index, 
       height=state_mean["median_active_list_price"],
       color = (["red" if state in ["MA", "HI"] else "grey" for state in state_mean.index])
      )

# color only above x or y
# fix annots


In [None]:
data = [x * -1 if x % 2 == 0 else x for x in range(10)]

In [None]:
data2 = [x * -1 if x % 2 != 0 else x for x in range(10)]

In [None]:
plt.barh(y=range(10),width=data)
plt.barh(y=range(10),width=data2)


In [None]:
plt.plot(data)

In [None]:
sns.pairplot()