In [9]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import statsmodels.api as sm

#For inline plotting 
%matplotlib inline                 
%config InlineBackend.figure_format = 'svg'


sns.set(style="white") # Plot parameters defined globally
plt.style.use("seaborn-v0_8-white")

## 12) Simple linear regression without an intercept

### a) There are two cases where the coefficient estimates (for a LR without an intercept) are the same. Looking at the coefficient estimates 

### $$ \hat{\beta}_x = \frac{\sum x_i y_i}{\sum x_i^2},\quad \hat{\beta}_y = \frac{\sum x_i y_i}{\sum y_i^2}$$

### When the sum squared values of x and y are equal $ \sum x_i^2 = \sum y_i^2$. This can be realized by of course when $x_i = y_i$ for each $i$ (trivial) or have them suffled in a way to satisfy $ \sum x_i^2 = \sum y_i^2$ in a non-trivial way. Note that for the former the true relationship is linear without an error such that least square fit will have a unit $R^2$ score: 

### $$ R^2  = \frac{(\sum x_i y_i)^2}{\sum x_i^2 \sum y_i^2}\quad \xrightarrow{x_i = y_i}\quad 1$$

### We can focus on the non-trivial case on part c) for which we will have $R^2 < 1$.

### b) 

In [10]:
np.random.seed(1)

x = np.random.randint(100, size = 100)

y = x + np.random.normal(size = 100) # due to random noise y is not equal to x in the sample 

slope_yx = sm.OLS(y,x,hasconst=False).fit().params[0], 
slope_xy = sm.OLS(x,y,hasconst=False).fit().params[0]

In [11]:
slope_yx, slope_xy

((0.9990208271060206,), 1.0006651728976468)

### c) Generating the target from the x values above, by using random permutations of x 

In [12]:
yalt = np.random.permutation(x)

slope_yxalt = sm.OLS(yalt,x,hasconst=False).fit().params[0], 
slope_xyalt = sm.OLS(x,yalt,hasconst=False).fit().params[0]

In [13]:
slope_yxalt, slope_xyalt

((0.7194841103177934,), 0.7194841103177934)