In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import mplcursors
import seaborn as sns
import statsmodels.api as sm

In [10]:
def scatterplot_with_hovering_annaotation(df, x_col_name, y_col_name, anna_arr, title):
    fig,ax = plt.subplots(figsize=(10, 8))
    ax.set_title(title)
    ax.set_xlabel(x_col_name)
    ax.set_ylabel(y_col_name)

    sc = plt.scatter(df[x_col_name], df[y_col_name],s=40, alpha=0.5)

    annot = ax.annotate("", xy=(0,0), xytext=(20,20),textcoords="offset points",
                        bbox=dict(boxstyle="round", fc="w"),
                        arrowprops=dict(arrowstyle="->", color="black"))
    annot.set_visible(False)

    def update_annot(ind):

        pos = sc.get_offsets()[ind["ind"][0]]
        annot.xy = pos
        text = "{}, {}".format(" ".join(list(map(str,ind["ind"]))), 
                               " ".join([anna_arr[n] for n in ind["ind"]]))
        annot.set_text(text)
        annot.get_bbox_patch().set_alpha(0.4)


    def hover(event):
        vis = annot.get_visible()
        if event.inaxes == ax:
            cont, ind = sc.contains(event)
            if cont:
                update_annot(ind)
                annot.set_visible(True)
                fig.canvas.draw_idle()
            else:
                if vis:
                    annot.set_visible(False)
                    fig.canvas.draw_idle()

    fig.canvas.mpl_connect("motion_notify_event", hover)

In [3]:
plot_df = pd.read_csv("./test.csv")

In [4]:
# LR

y = plot_df["log_property_tax_per_capita"].values
x = plot_df["pct_non-Hispanic_White_2009"].values

x = sm.add_constant(x)
model = sm.OLS(y, x)
results = model.fit()

print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.023
Model:                            OLS   Adj. R-squared:                  0.016
Method:                 Least Squares   F-statistic:                     3.229
Date:                Tue, 26 Apr 2022   Prob (F-statistic):             0.0745
Time:                        17:04:35   Log-Likelihood:                -105.59
No. Observations:                 140   AIC:                             215.2
Df Residuals:                     138   BIC:                             221.1
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          6.1220      0.096     63.976      0.0

In [5]:
results.params

array([6.12196265, 0.36165063])

In [11]:
%matplotlib notebook

scatterplot_with_hovering_annaotation(
    plot_df,
    "pct_non-Hispanic_White_2009",
    "log_property_tax_per_capita",
    plot_df["Name"],
    "log prop tax per capita vs non-hispanic white pct in 2009" 
)
plt.plot([0, 1], [results.params[0], results.params[0] + results.params[1]], 'k-', color = 'r')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7fc5c07298d0>]