# 0. Importing Necessary Packages

In [1]:
# Printing the information of Python, IPython, OS, and the generation date.
%load_ext version_information
%version_information

Software,Version
Python,3.7.9 64bit [GCC 7.3.0]
IPython,7.16.1
OS,Linux 5.8.18 100.fc31.x86_64 x86_64 with fedora 31 Thirty_One
Wed Mar 30 10:42:10 2022 KST,Wed Mar 30 10:42:10 2022 KST


In [2]:
# Printing the versions of packages
from importlib_metadata import version
for pkg in ['numpy', 'scipy', 'matplotlib', 'astropy', 'pandas', 'statsmodels']:
    print(pkg+": ver "+version(pkg))

numpy: ver 1.21.0
scipy: ver 1.6.2
matplotlib: ver 3.2.2
astropy: ver 4.2.1
pandas: ver 1.2.4
statsmodels: ver 0.12.2


In [3]:
# matplotlib backend
%matplotlib notebook

# importing necessary modules
import numpy as np
import glob, os
import pandas as pd
from sklearn import linear_model
import statsmodels.api as sm
from matplotlib import pyplot as plt

# 1. Reading the Data

In [4]:
# Observation data: r-band magnitude, r-band magnitude error, and airmass
obs = pd.read_csv("Calibration/Observation.csv")
obs.head(10)

Unnamed: 0,Star,r_obs,e_r_obs,airmass
0,1,10.484,0.143,1.199
1,2,11.641,0.182,1.199
2,3,9.215,0.074,1.199
3,4,9.047,0.033,1.199
4,5,9.502,0.036,1.199
5,6,10.025,0.344,1.199
6,7,9.079,0.054,1.199
7,8,9.914,0.046,1.199
8,9,13.015,0.239,1.331
9,10,13.206,0.286,1.331


In [5]:
# Standard star data: V-band magnitude, and color indices of B-V, U-B, V-R, R-I, and V-I
lan = pd.read_csv("Calibration/Landolt_catalog.csv")
lan.head(10)

Unnamed: 0,Star,V,B-V,U-B,V-R,R-I,V-I
0,1,13.004,1.04,0.737,0.607,0.681,1.287
1,2,14.196,1.052,0.38,0.606,0.597,1.203
2,3,11.737,0.987,0.639,0.6,0.674,1.273
3,4,12.33,2.326,2.326,1.373,1.25,2.625
4,5,11.773,0.671,0.506,0.373,0.436,0.808
5,6,14.022,1.248,1.323,0.797,0.683,1.482
6,7,11.312,0.568,0.059,0.335,0.312,0.652
7,8,12.44,1.141,0.83,0.633,0.579,1.206
8,9,14.767,-0.252,-1.091,-0.111,-0.182,-0.296
9,10,15.553,0.83,0.356,0.433,0.389,0.824


In [6]:
# Merging all the data in one data frame (for convenience)
df = pd.merge(lan, obs, how="left", on="Star")
df['R'] = -(df['V-R']-df['V'])
df.head(10)

Unnamed: 0,Star,V,B-V,U-B,V-R,R-I,V-I,r_obs,e_r_obs,airmass,R
0,1,13.004,1.04,0.737,0.607,0.681,1.287,10.484,0.143,1.199,12.397
1,2,14.196,1.052,0.38,0.606,0.597,1.203,11.641,0.182,1.199,13.59
2,3,11.737,0.987,0.639,0.6,0.674,1.273,9.215,0.074,1.199,11.137
3,4,12.33,2.326,2.326,1.373,1.25,2.625,9.047,0.033,1.199,10.957
4,5,11.773,0.671,0.506,0.373,0.436,0.808,9.502,0.036,1.199,11.4
5,6,14.022,1.248,1.323,0.797,0.683,1.482,10.025,0.344,1.199,13.225
6,7,11.312,0.568,0.059,0.335,0.312,0.652,9.079,0.054,1.199,10.977
7,8,12.44,1.141,0.83,0.633,0.579,1.206,9.914,0.046,1.199,11.807
8,9,14.767,-0.252,-1.091,-0.111,-0.182,-0.296,13.015,0.239,1.331,14.878
9,10,15.553,0.83,0.356,0.433,0.389,0.824,13.206,0.286,1.331,15.12


In [7]:
# Defining functions (for convenience)

# Plot - Observed values vs. Fitted values
def plot_comparison(input_data, fitted_data):
    arr0 = np.linspace(-5.0, 0.0, 1000)
    min_limit = np.minimum(input_data.min(), fitted_data.min()) - 0.2
    max_limit = np.maximum(input_data.max(), fitted_data.max()) + 0.2

    fig, ax = plt.subplots(figsize=(5,5))
    ax.plot(arr0, arr0, 'r--', linewidth=1.5, alpha=0.6)
    ax.plot(input_data, fitted_data, 'o', color='blue', ms=4.0)
    ax.tick_params(axis='both', labelsize=12.0)
    ax.set_xlabel(r"Observed $r-R$", fontsize=12.0)
    ax.set_ylabel(r"Fitted $r-R$", fontsize=12.0)
    ax.set_xlim([min_limit, max_limit])
    ax.set_ylim([min_limit, max_limit])
    plt.tight_layout()

# Plot - Observed values vs. Residuals
def plot_residuals(input_data, residuals):
    arr0 = np.linspace(-5.0, 0.0, 1000)
    min_limit = input_data.min() - 0.2
    max_limit = input_data.max() + 0.2
    RMSE = np.sqrt(np.sum(residuals**2) / len(input_data))

    fig, ax = plt.subplots(figsize=(5,5))
    ax.plot(arr0, np.zeros_like(arr0), 'r--', linewidth=1.5, alpha=0.6)
    ax.plot(input_data, residuals, 'o', color='blue', ms=4.0)
    ax.tick_params(axis='both', labelsize=12.0)
    ax.set_xlabel(r"Observed $r-R$", fontsize=12.0)
    ax.set_ylabel("Residuals", fontsize=12.0)
    ax.set_xlim([min_limit, max_limit])
    ax.set_ylim([-1.5, 1.5])
    ax.text(0.05, 0.95, f"RMS Error = {RMSE:.2f}", fontsize=13.0, fontweight='bold',
            transform=ax.transAxes, ha='left', va='top')
    plt.tight_layout()

# Printing the summary of model
def summary_model(x, y, e_y):
    Xm = sm.add_constant(x)
    model = sm.WLS(y.astype('float'), Xm.astype('float'), weights=1/e_y**2).fit() 
    print_model = model.summary()
    print(print_model)

# 2. Linear Regression

### 1) Multiple linear regression with all the data

$r-R = Zero(R) + k(R) \times airmass + c(R) \times (V-R)$

**We have to guess the three parameters: $Zero(R)$, $k(R)$, and $c(R)$.**

* $Zero(R)$: Zeropoint (different from 25.0!)
* $k(R)$: Extinction coefficient
* $c(R)$: Color coefficient

In [8]:
# Setting X and Y for multiple linear regression
X = df[['airmass', 'V-R']]
Y = df['r_obs'] - df['R']
e_Y = df['e_r_obs']

In [9]:
# Running the multiple linear regression
regr = linear_model.LinearRegression()
regr.fit(X, Y)    # Without considering magnitude error
regr.fit(X, Y, 1/e_Y**2.)    # With considering magnitude error
print(f"Zeropoint: Zero(R) = {regr.intercept_:.3f}")
print("\nCoeffients")
print(f"Extinction coefficient: k(R) = {regr.coef_[0]:.3f}")
print(f"Color coefficient: c(V-R) = {regr.coef_[1]:.3f}")
print("\n")
summary_model(X, Y, e_Y)
fitted_Y = regr.predict(X)
resi = Y - regr.predict(X)

Zeropoint: Zero(R) = -2.159

Coeffients
Extinction coefficient: k(R) = 0.215
Color coefficient: c(V-R) = -0.009


                            WLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.052
Model:                            WLS   Adj. R-squared:                 -0.094
Method:                 Least Squares   F-statistic:                    0.3566
Date:                Wed, 30 Mar 2022   Prob (F-statistic):              0.707
Time:                        10:42:12   Log-Likelihood:                 14.482
No. Observations:                  16   AIC:                            -22.96
Df Residuals:                      13   BIC:                            -20.65
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------

  "anyway, n=%i" % int(n))


* **Fitting results**

    * $r-R = (-2.159 \pm 0.397) + (0.215 \pm 0.319) \times airmass + (-0.009 \pm 0.036) \times (V-R)$

### **How much reliable are these results?**

In [10]:
plot_comparison(Y, fitted_Y)    # Comparison plot (observed Y vs. fitted Y)

<IPython.core.display.Javascript object>

In [11]:
plot_residuals(Y, resi)

<IPython.core.display.Javascript object>

In [12]:
# Printing residuals
resi

0    -0.007138
1    -0.043147
2    -0.016199
3     0.002519
4     0.005828
5    -1.292487
6     0.005498
7     0.013088
8     0.008225
9    -0.038048
10    0.025353
11   -0.005350
12   -0.063622
13    0.006778
14    0.000124
15   -0.012245
dtype: float64

**We should remove the data of Star 6 (index 5) for better results!**

### 2) Multiple linear regression with clipped data

In [13]:
df2 = df.drop(index = 5)    # Dropping the 5th index data (Star 6)
df2

Unnamed: 0,Star,V,B-V,U-B,V-R,R-I,V-I,r_obs,e_r_obs,airmass,R
0,1,13.004,1.04,0.737,0.607,0.681,1.287,10.484,0.143,1.199,12.397
1,2,14.196,1.052,0.38,0.606,0.597,1.203,11.641,0.182,1.199,13.59
2,3,11.737,0.987,0.639,0.6,0.674,1.273,9.215,0.074,1.199,11.137
3,4,12.33,2.326,2.326,1.373,1.25,2.625,9.047,0.033,1.199,10.957
4,5,11.773,0.671,0.506,0.373,0.436,0.808,9.502,0.036,1.199,11.4
6,7,11.312,0.568,0.059,0.335,0.312,0.652,9.079,0.054,1.199,10.977
7,8,12.44,1.141,0.83,0.633,0.579,1.206,9.914,0.046,1.199,11.807
8,9,14.767,-0.252,-1.091,-0.111,-0.182,-0.296,13.015,0.239,1.331,14.878
9,10,15.553,0.83,0.356,0.433,0.389,0.824,13.206,0.286,1.331,15.12
10,11,11.989,0.593,0.005,0.364,0.344,0.711,9.775,0.065,1.331,11.625


In [14]:
# Setting X and Y for multiple linear regression
X = df2[['airmass', 'V-R']]    # Multiple linear regression with clipped data
Y = df2['r_obs'] - df2['R']
e_Y = df2['e_r_obs']

In [15]:
# Running the multiple linear regression
regr = linear_model.LinearRegression()
regr.fit(X, Y, 1/e_Y**2.)
print(f"Zeropoint: Zero(R) = {regr.intercept_:.3f}")
print("\nCoeffients")
print(f"Extinction coefficient: k(R) = {regr.coef_[0]:.3f}")
print(f"Color term: c(V-R) = {regr.coef_[1]:.3f}")
print("\n")
summary_model(X, Y, e_Y)
fitted_Y = regr.predict(X)
resi = Y - regr.predict(X)

Zeropoint: Zero(R) = -2.141

Coeffients
Extinction coefficient: k(R) = 0.202
Color term: c(V-R) = -0.007


                            WLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.580
Model:                            WLS   Adj. R-squared:                  0.510
Method:                 Least Squares   F-statistic:                     8.275
Date:                Wed, 30 Mar 2022   Prob (F-statistic):            0.00551
Time:                        10:42:12   Log-Likelihood:                 39.961
No. Observations:                  15   AIC:                            -73.92
Df Residuals:                      12   BIC:                            -71.80
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------

  "anyway, n=%i" % int(n))


* **Fitting results**

    * $r-R = (-2.141 \pm 0.075) + (0.202 \pm 0.060) \times airmass + (-0.007 \pm 0.007) \times (V-R)$

In [16]:
plot_comparison(Y, fitted_Y)

<IPython.core.display.Javascript object>

In [17]:
plot_residuals(Y, resi)

<IPython.core.display.Javascript object>