### Importing the required packages 

In [None]:
import zipfile
import os
from astropy.io import fits
from io import BytesIO
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Storing the file path 

In [None]:
# Path to the zip file
zip_path = r"C:\Users\KHUSHI\Desktop\Work\fits.zip"

In [None]:
results = []
fwhm_list = []
weights = []
mask_set = set()

def compute_vsini(FWHM, MASK):
    if MASK == 'G2':
        return (1.09446 * FWHM) - 5.45380
    elif MASK == 'K5':
        return (1.26952 * FWHM) - 6.06771
    else:
        return None

     
with zipfile.ZipFile(zip_path, 'r') as z:
    fits_files = [f for f in z.namelist() if f.startswith('fits/HD_82943/') and f.endswith('.fits')]
    print(f"Found {len(fits_files)} FITS files")

    for fits_filename in fits_files:
        print(f"\nReading: {fits_filename}")
        with z.open(fits_filename) as f:
            with fits.open(BytesIO(f.read())) as hdul:
                header = hdul[0].header
                rv = header.get('HIERARCH ESO DRS CCF RV')
                contrast = header.get('HIERARCH ESO DRS CCF reCONTRAST')
                date_obs = header.get('DATE-OBS')
                FWHM = header.get('HIERARCH ESO DRS CCF FWHM')
                CCF_NOISE = header.get('HIERARCH ESO DRS CCF NOISE')
                MASK = header.get('HIERARCH ESO DRS CCF MASK')

                vsini = None
                sigma_fwhm = None

                # Compute sigma_FWHM = 2.35 * noise
                sigma_fwhm = 2.35 * CCF_NOISE 
                weightened_fwhm = ( 1/ (CCF_NOISE**2))
                if FWHM is not None and CCF_NOISE is not None and CCF_NOISE > 0:
                    fwhm_list.append(FWHM)
                    weights.append(1 / (CCF_NOISE ** 2))
                if MASK:
                    mask_set.add(MASK)
                    vsini = compute_vsini(FWHM, MASK)

                results.append({
                    "filename": fits_filename,
                    "date_obs": date_obs,
                    "rv": rv,
                    "contrast": contrast,
                    "FWHM": FWHM,
                    "CCF_NOISE": CCF_NOISE,
                    "MASK": MASK,
                     "sigma_fwhm": sigma_fwhm,
                    'vsini': vsini

          
                })

# Save to DataFrame
df = pd.DataFrame(results)
print(df)

# Optional: save to CSV
df.to_csv("rv_contrast_data.csv", index=False)

# Convert to numpy arrays
fwhm_arr = np.array(fwhm_list)
weights_arr = np.array(weights)

# Sort FWHM and corresponding weights
sorted_indices = np.argsort(fwhm_arr)
fwhm_sorted = fwhm_arr[sorted_indices]
weights_sorted = weights_arr[sorted_indices]

# Compute cumulative weights
cumulative_weights = np.cumsum(weights_sorted)
total_weight = cumulative_weights[-1]

# Find index where cumulative weight crosses half total weight
median_idx = np.searchsorted(cumulative_weights, total_weight / 2.0)
weighted_median = fwhm_sorted[median_idx]

print(f"\nWeighted Median FWHM = {weighted_median:.4f} km/s")



# Compute vsini using weighted median FWHM
vsini_final = compute_vsini(weighted_median, 'G2')

print(f"Final vsin(i) without error using weighted median FWHM = {vsini_final:.4f} km/s")



### Weighted median for FWHM values

In [None]:
print(f"\nFinal Weighted Median FWHM = {weighted_median:.4f} km/s")

### Weighted median with error for FWHM values

In [None]:
fwhm_std = np.std(fwhm_arr, ddof=1) 
print(f"Weighted Median FWHM = {weighted_median:.4f} ± {fwhm_std:.4f} km/s")

### vsini value with error for G2 Mask 

In [None]:
error_vsini1 = np.sqrt(((weighted_median)**2)*((0.21854)**2)+((2.00007)**2)+2*(weighted_median)*(0.21854)*(2.00007)*(-0.96604))
print(f"Error for vsini is = {error_vsini1:.4f} km/s")
print(f"Final vsini with error is = {vsini_final:.4f} ± {error_vsini1:.4f} km/s")

### vsini value with error for K5 Mask 

In [None]:
error_vsini2 = np.sqrt(((weighted_median)**2)*((0.19402)**2)+((1.62830)**2)+2*(weighted_median)*(0.19402)*(1.62830)*(-0.96250))
print(f"Error for vsini is = {error_vsini2:.4f} km/s")
print(f"Final vsini with error is = {vsini_final:.4f} ± {error_vsini2:.4f} km/s")

### Calculating total error 

In [None]:
total_error = np.sqrt((error_vsini)**2 + ((1.09446)**2)*((fwhm_std)**2))
print(total_error)

### Now we will read the given CSV file and make the required plots 

In [None]:
# Load your CSV file
df = pd.read_csv("Vsini_Values _again.csv")
df.head()

In [None]:
print(df.columns)

### Cleaning the dataframe 

In [None]:
# Remove rows where Msini or sin_i is NaN, zero, or negative
df_clean = df.copy()

# Drop rows with missing or invalid values
df_clean = df_clean.dropna(subset=['Star Name', 'weighted median (FWHM), Km/s',
       'uncertainity (FWHM), Km/s', 'relative error (FWHM)',
       'calculated vsini, Km/s', 'error (vsini), Km/s',
       'relative error (vsini)', 'mask', 'Lit. values of vsini',
       'stellar radius, R (R_sun)', 'error (R)',
       'stellar rotational period, P_rot', 'error (P_rot)', 'sini',
       'error (sini)', 'relative error(sini)', 'exoplanets',
       'Msini (M_Jupiter)', 'error (Msini)', 'Mass, M', 'error (M)',
       'relative error (M)'])

In [None]:
print(df_clean.columns)

In [None]:
df_clean.head()

In [None]:
import matplotlib.pyplot as plt

plt.hist(df['weighted median (FWHM), Km/s'], bins=25, color='skyblue', edgecolor='black')
plt.xlabel('Median FWHM (km/s)')
plt.ylabel('Frequency')
plt.title('Distribution of Weighted Median FWHM')

### Vsini values vs. Weighted median 

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Scatter plot
plt.scatter(df_clean['weighted median (FWHM), Km/s'], df_clean['calculated vsini, Km/s'], alpha=0.7, label='Data')

# Fit a linear trend line
coeffs = np.polyfit(df_clean['weighted median (FWHM), Km/s'], df_clean['calculated vsini, Km/s'], 1)  # Linear: degree 1
x_vals = np.linspace(df_clean['weighted median (FWHM), Km/s'].min(), df_clean['weighted median (FWHM), Km/s'].max(), 100)
y_vals = np.polyval(coeffs, x_vals)

# Plot the spectral line (fit line)
plt.plot(x_vals, y_vals, color='red', linestyle='--', label='Linear Fit')

# Labels and legend
plt.xlabel('Median FWHM (km/s)')
plt.ylabel('vsini (km/s)')
plt.title('Median FWHM vs. vsini')
plt.legend()
plt.grid(True)
plt.show()


### Vsini values vs. Weighted median with error bars 

In [None]:
import numpy as np
import matplotlib.pyplot as plt


plt.figure(figsize=(10, 6))  # Set figure size BEFORE plotting

x = df_clean['weighted median (FWHM), Km/s']
y = df_clean['calculated vsini, Km/s']
x_err = df_clean.get('uncertainity (FWHM), Km/s')
y_err = df_clean.get('uncertainity (vsini), Km/s')
names = df_clean['Star Name']

plt.errorbar(x, y, xerr=x_err, yerr=y_err, fmt='o', color ='hotpink', alpha=0.7, capsize=3, ecolor='pink', label='Data')

# Label only top 10 by vsini within the x-axis limits
top_n = 10

# Filter points inside the desired x range
mask = (x >= 5.5) & (x <= 8.5)
x_zoom = x[mask]
y_zoom = y[mask]
names_zoom = names[mask]

# Instead of labeling only top 10, label all zoomed stars
for i in x_zoom.index:
    plt.text(x[i] + 0.1, y[i] + 0.1, names[i], fontsize=8, color='black', ha='right', va='bottom', alpha=0.9)


# Fit line using all data or only zoomed data — your choice:
coeffs = np.polyfit(x_zoom, y_zoom, 1)
x_vals = np.linspace(5.5, 8.5, 100)
y_vals = np.polyval(coeffs, x_vals)
plt.plot(x_vals, y_vals, color='red', linestyle='--', label='Linear Fit')

plt.xlabel('Median FWHM (km/s)')
plt.ylabel('vsini (km/s)')
plt.title('Median FWHM vs. vsini ')
plt.xlim(5.5, 8.5)
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# Plot
plt.figure(figsize=(8, 4))
sns.barplot(x='Star Name', y='weighted median (FWHM), Km/s', data=df_clean, color='skyblue')

plt.xticks(rotation=45, ha='right', fontsize=9)
plt.ylabel('Weighted Median FWHM (km/s)')
plt.xlabel('Star Name')
plt.title('Weighted Median FWHM for individual stars')

plt.ylim(0, 14)   
plt.tight_layout()
plt.grid(axis='y')
plt.show()


In [None]:
# Clean column names (remove extra spaces)
df.columns = df.columns.str.strip()

# Drop rows where uncertainty or star name is missing
df_clean = df.dropna(subset=['uncertainity (FWHM), Km/s', 'Star Name'])

# Sort by FWHM uncertainty (optional for display aesthetics)
df_clean_sorted = df_clean.sort_values(by='uncertainity (FWHM), Km/s', ascending=False)

# Plot
plt.figure(figsize=(12, 5))
sns.barplot(
    x='Star Name',
    y='uncertainity (FWHM), Km/s',
    data=df_clean_sorted,
    color='lightcoral',
    edgecolor='black'
)

# Plot formatting
plt.xticks(rotation=45, ha='right', fontsize=9)
plt.ylabel('Uncertainty in FWHM (km/s)')
plt.xlabel('Star Name')
plt.title('FWHM error values for Individual Stars')
plt.ylim(0, 1)  
plt.grid(axis='y', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()


In [None]:
# Extract the weighted median FWHM values
weighted_medians = df_clean['weighted median (FWHM), Km/s']

# Plot the histogram
plt.figure(figsize=(8, 5))
sns.histplot(weighted_medians, bins=20, kde=True, color='teal')

plt.xlabel("Weighted Median FWHM (km/s)")
plt.ylabel("Number of Stars")
plt.title("Histogram of Weighted Median FWHM for All Stars")
plt.grid(True)
plt.tight_layout()
plt.show()


### True Mass vs. Msini (full view)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Assuming df is already cleaned as per your previous code...

# Extract Msini, M, and star names
x = df['Msini (M_Jupiter)']
y = df['Mass, M']
names = df['Star Name']  # Change if your column name is different

# Fit a linear model
coeffs = np.polyfit(x, y, 1)
x_fit = np.linspace(x.min(), x.max(), 100)
y_fit = np.polyval(coeffs, x_fit)

# Plot
plt.figure(figsize=(8, 6))
plt.scatter(x, y, color='mediumvioletred', alpha=0.7, label='Valid Data Points')
plt.plot(x_fit, y_fit, 'r--', label=f'Linear Fit: y = {coeffs[0]:.2f}x + {coeffs[1]:.2f}')

# Add star name labels with a small offset
for i, star in enumerate(names):
    plt.text(x.iloc[i] + 0.02, y.iloc[i] + 0.02, star, fontsize=8, alpha=0.7)

plt.xlabel('Msini (M_Jupiter)')
plt.ylabel('Mass, M')
plt.title('True Mass vs. Msini')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()


### True Mass vs. Msini ( 0 - 0.2 range) 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Assuming df is cleaned as before

# Filter for very tight zoom range
df_zoom = df[(df['Msini (M_Jupiter)'] >= 0) & (df['Msini (M_Jupiter)'] <= 0.2) &
             (df['Mass, M'] >= 0) & (df['Mass, M'] <= 0.2)]

x = df_zoom['Msini (M_Jupiter)']
y = df_zoom['Mass, M']
names = df_zoom['Star Name']  # Adjust if needed

if len(df_zoom) == 0:
    print("No data points in the range 0 to 0.2 for both Msini and Mass, M.")
else:
    # Fit linear model on zoomed data
    coeffs = np.polyfit(x, y, 1)
    x_fit = np.linspace(0, 0.2, 100)
    y_fit = np.polyval(coeffs, x_fit)

    plt.figure(figsize=(8, 6))
    plt.scatter(x, y, color='mediumvioletred', alpha=0.7, label=' Data Points')
    plt.plot(x_fit, y_fit, 'r--', label=f'Linear Fit: y = {coeffs[0]:.2f}x + {coeffs[1]:.2f}')

    # Label points with small offset
    for i, star in enumerate(names):
        plt.text(x.iloc[i] + 0.005, y.iloc[i] + 0.005, star, fontsize=8, alpha=0.7)

    plt.xlabel('Msini (M_Jupiter)')
    plt.ylabel('Mass, M')
    plt.title('True Mass vs. Msini (0 to 0.2 Range)')
    plt.xlim(0, 0.2)
    plt.ylim(0, 0.2)
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.show()


### True Mass vs. Msini with error bars

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Convert columns to numeric
df['Msini (M_Jupiter)'] = pd.to_numeric(df['Msini (M_Jupiter)'], errors='coerce')
df['Mass, M'] = pd.to_numeric(df['Mass, M'], errors='coerce')
df['error (Msini)'] = pd.to_numeric(df['error (Msini)'], errors='coerce')
df['error (M)'] = pd.to_numeric(df['error (M)'], errors='coerce')

# Filter for zoomed-in range with non-NaN values
df_zoom = df[
    (df['Msini (M_Jupiter)'] >= 0) & (df['Msini (M_Jupiter)'] <= 0.2) &
    (df['Mass, M'] >= 0) & (df['Mass, M'] <= 0.2) &
    df[['error (Msini)', 'error (M)']].notna().all(axis=1)
]

# Extract values
x = df_zoom['Msini (M_Jupiter)']
y = df_zoom['Mass, M']
x_err = df_zoom['error (Msini)']
y_err = df_zoom['error (M)']

# Check if there's data
if len(df_zoom) == 0:
    print("No data points in the range 0 to 0.2 for both Msini and Mass, M.")
else:
    # Fit linear model
    coeffs = np.polyfit(x, y, 1)
    x_fit = np.linspace(0, 0.2, 100)
    y_fit = np.polyval(coeffs, x_fit)

    # Plot
    plt.figure(figsize=(8, 6))
    plt.errorbar(x, y, xerr=x_err, yerr=y_err, fmt='o', color='mediumvioletred',
                 ecolor='gray', elinewidth=1, capsize=3, label='Data with Errors')
    plt.plot(x_fit, y_fit, 'r--', label=f'Linear Fit: y = {coeffs[0]:.2f}x + {coeffs[1]:.2f}')

    plt.xlabel('Msini (M_Jupiter)')
    plt.ylabel('Mass, M')
    plt.title('True Mass vs. Msini')
    plt.xlim(0, 0.2)
    plt.ylim(0, 0.2)
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.show()


### Calculated vsini vs Weighted Median FWHM (without zoom)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Assuming your DataFrame 'df_summary' contains one row per star:
# Columns: ['Star Name', 'FWHM_median', 'FWHM_err', 'vsini', 'vsini_err']

# Filter out any NaNs
df_plot = df.dropna(subset=['weighted median (FWHM), Km/s', 'calculated vsini, Km/s'])

# Plot
plt.figure(figsize=(10, 6))
plt.errorbar(df_plot['weighted median (FWHM), Km/s'], df_plot['calculated vsini, Km/s'],
             xerr=df_plot.get('uncertainity (FWHM), Km/s', None),
             yerr=df_plot.get('error (vsini), Km/s', None),
             fmt='o', ecolor='gray', capsize=3, color='mediumvioletred',
             label='Stars')

# Annotate star names
for i, row in df_plot.iterrows():
    plt.text(row['weighted median (FWHM), Km/s'] + 0.05, row['calculated vsini, Km/s'] + 0.05, row['Star Name'], fontsize=8, alpha=0.7)

plt.xlabel('Weighted Median FWHM (km/s)')
plt.ylabel('Calculated v sin i (km/s)')
plt.title('v sin i vs Weighted Median FWHM')
plt.grid(True)
plt.tight_layout()
plt.legend()
plt.show()


### Literature vsin i vs Weighted Median FWHM

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import pearsonr

# Filter rows with required columns present
df_plot = df.dropna(subset=[
    'weighted median (FWHM), Km/s',
    'Lit. values of vsini',
    'uncertainity (FWHM), Km/s',
    'error (vsini), Km/s'
])

# Extract data
x = df_plot['weighted median (FWHM), Km/s']
y = df_plot['Lit. values of vsini']
xerr = df_plot['uncertainity (FWHM), Km/s']
yerr = None

# Linear fit
coeffs = np.polyfit(x, y, 1)
slope, intercept = coeffs
x_fit = np.linspace(5.5, 8.5, 100)
y_fit = np.polyval(coeffs, x_fit)

# Statistics
r_value, p_value = pearsonr(x, y)
r_squared = r_value ** 2
N = len(x)

# Plot
plt.figure(figsize=(8, 6))
plt.errorbar(x, y, xerr=xerr, yerr=None, fmt='o',
             ecolor='gray', capsize=3, color='black',
             label='Stars with errors')

# Star name labels
if 'Star Name' in df_plot.columns:
    for i, row in df_plot.iterrows():
        plt.text(row['weighted median (FWHM), Km/s'] + 0.03,
                 row['Lit. values of vsini'] + 0.03,
                 row['Star Name'], fontsize=8, alpha=0.6)

# Linear fit line
plt.plot(x_fit, y_fit, 'gray', linestyle='--',
         label=f'Linear fit: y = {slope:.2f}x + {intercept:.2f}')

# Labels and settings
plt.xlabel('Weighted Median FWHM (km/s)')
plt.ylabel('Literature v sin i (km/s)')
plt.title('v sin i vs Weighted Median FWHM')
plt.xlim(5.5, 8.5)
plt.grid(True)
plt.legend()

# Annotation box
stats_text = (
    f"N = {N}\n"
    f"Pearson r = {r_value:.2f}\n"
    f"p-value = {p_value:.2g}\n"
    f"R² = {r_squared:.2f}\n"
    f"y = {slope:.2f}x + {intercept:.2f}"
)

plt.annotate(stats_text,
             xy=(0.05, 0.95), xycoords='axes fraction',
             fontsize=11, verticalalignment='top',
             bbox=dict(boxstyle="round,pad=0.4", facecolor='lavender', edgecolor='gray'))

plt.tight_layout()
plt.show()


### Calculated vsini vs Weighted Median with error bars FWHM

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import pearsonr

# Filter rows with required columns present
df_plot = df.dropna(subset=[
    'weighted median (FWHM), Km/s',
    'calculated vsini, Km/s',
    'uncertainity (FWHM), Km/s',
    'error (vsini), Km/s'
])

# Extract data
x = df_plot['weighted median (FWHM), Km/s']
y = df_plot['calculated vsini, Km/s']
xerr = df_plot['uncertainity (FWHM), Km/s']
yerr = df_plot['error (vsini), Km/s']

# Linear fit
coeffs = np.polyfit(x, y, 1)
slope, intercept = coeffs
x_fit = np.linspace(5.5, 8.5, 100)
y_fit = np.polyval(coeffs, x_fit)

# Statistics
r_value, p_value = pearsonr(x, y)
r_squared = r_value ** 2
N = len(x)

# Plot
plt.figure(figsize=(8, 6))
plt.errorbar(x, y, xerr=xerr, yerr=yerr, fmt='o',
             ecolor='gray', capsize=3, color='black',
             label='Stars with errors')

# Star name labels
if 'Star Name' in df_plot.columns:
    for i, row in df_plot.iterrows():
        plt.text(row['weighted median (FWHM), Km/s'] + 0.03,
                 row['calculated vsini, Km/s'] + 0.03,
                 row['Star Name'], fontsize=8, alpha=0.6)

# Linear fit line
plt.plot(x_fit, y_fit, 'gray', linestyle='--',
         label=f'Linear fit: y = {slope:.2f}x + {intercept:.2f}')

# Labels and settings
plt.xlabel('Weighted Median FWHM (km/s)')
plt.ylabel('Calculated v sin i (km/s)')
plt.title('v sin i vs Weighted Median FWHM')
plt.xlim(5.5, 8.5)
plt.grid(True)
plt.legend()

# Annotation box
stats_text = (
    f"N = {N}\n"
    f"Pearson r = {r_value:.2f}\n"
    f"p-value = {p_value:.2g}\n"
    f"R² = {r_squared:.2f}\n"
    f"y = {slope:.2f}x + {intercept:.2f}"
)

plt.annotate(stats_text,
             xy=(0.05, 0.95), xycoords='axes fraction',
             fontsize=11, verticalalignment='top',
             bbox=dict(boxstyle="round,pad=0.4", facecolor='lavender', edgecolor='gray'))

plt.tight_layout()
plt.show()


### Residuals vs Calculated vsini with Linear Fit

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Filter data
df_res = df.dropna(subset=[
    'calculated vsini, Km/s',
    'Lit. values of vsini',
    'Star Name'  # for labeling
])

# Extract
y_calc = df_res['calculated vsini, Km/s']
y_lit = df_res['Lit. values of vsini']
residuals = y_lit - y_calc
names = df_res['Star Name']

# Linear fit of residuals vs y_calc
coeffs = np.polyfit(y_calc, residuals, 1)
x_fit = np.linspace(y_calc.min(), y_calc.max(), 100)
y_fit = np.polyval(coeffs, x_fit)

plt.figure(figsize=(10, 6))
plt.scatter(y_calc, residuals, color='purple', alpha=0.7, label='Residuals')

# Plot fit line
plt.plot(x_fit, y_fit, 'r--', label=f'Fit: y = {coeffs[0]:.2f}x + {coeffs[1]:.2f}')

# Horizontal zero line
plt.axhline(0, color='gray', linestyle='--')

# Annotate star names
for i, star in enumerate(names):
    plt.text(y_calc.iat[i] + 0.05, residuals.iat[i] + 0.05, star, fontsize=8, alpha=0.7)

plt.xlabel('Calculated vsini (km/s)')
plt.ylabel('Residuals (Lit. vsini - Calculated vsini) (km/s)')
plt.title('Residuals vs Calculated vsini with Linear Fit')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


### Residuals vs Literature vsini with Linear Fit

In [None]:

import matplotlib.pyplot as plt
import numpy as np

# Filter data
df_res = df.dropna(subset=[
    'calculated vsini, Km/s',
    'Lit. values of vsini',
    'Star Name'  # for labeling
])

# Extract
y_calc = df_res['calculated vsini, Km/s']
y_lit = df_res['Lit. values of vsini']
residuals = y_lit - y_calc
names = df_res['Star Name']

# Linear fit of residuals vs Lit values
coeffs = np.polyfit(y_lit, residuals, 1)
x_fit = np.linspace(y_lit.min(), y_lit.max(), 100)
y_fit = np.polyval(coeffs, x_fit)

plt.figure(figsize=(10, 6))
plt.scatter(y_lit, residuals, color='purple', alpha=0.7, label='Residuals')

# Plot fit line
plt.plot(x_fit, y_fit, 'r--', label=f'Fit: y = {coeffs[0]:.2f}x + {coeffs[1]:.2f}')

# Horizontal zero line
plt.axhline(0, color='gray', linestyle='--')

# Annotate star names
for i, star in enumerate(names):
    plt.text(y_lit.iat[i] + 0.05, residuals.iat[i] + 0.05, star, fontsize=8, alpha=0.7)

plt.xlabel('Literature vsini (km/s)')
plt.ylabel('Residuals (Lit. vsini - Calculated vsini) (km/s)')
plt.title('Residuals vs Literature vsini with Linear Fit')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


### Residuals vs Literature vsini with Linear Fit

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Filter data
df_res = df.dropna(subset=[
    'calculated vsini, Km/s',
    'Lit. values of vsini',
    'Star Name'  # Optional: keep for reference
])

# Extract
y_calc = df_res['calculated vsini, Km/s']
y_lit = df_res['Lit. values of vsini']
residuals = y_lit - y_calc

# Linear fit of residuals vs Lit values
coeffs = np.polyfit(y_lit, residuals, 1)
x_fit = np.linspace(y_lit.min(), y_lit.max(), 100)
y_fit = np.polyval(coeffs, x_fit)

# Plot
plt.figure(figsize=(10, 6))
plt.scatter(y_lit, residuals, color='hotpink', alpha=0.7, label='Residuals')
plt.plot(x_fit, y_fit, 'r--', label=f'Fit: y = {coeffs[0]:.2f}x + {coeffs[1]:.2f}')
plt.axhline(0, color='gray', linestyle='--')

# Set y-axis range
plt.ylim(-1, 1)

# Labels and layout
plt.xlabel('Literature vsini (km/s)')
plt.ylabel('Residuals (Lit. vsini - Calculated vsini) (km/s)')
plt.title('Residuals vs Literature vsini with Linear Fit')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


### Residuals vs Ccalculated vsini with Linear Fit

In [None]:
# Plot
plt.figure(figsize=(8, 4))
plt.scatter(y_lit, residuals, color='hotpink', alpha=0.9, label='Residuals')

# Horizontal zero line
plt.axhline(0, color='red', linestyle='--')
plt.ylim(-1, 1)
# Labels and layout
plt.xlabel('Literature vsini (km/s)')
plt.ylabel('Residuals (Lit. vsini - Calculated vsini) (km/s)')
plt.title('Residuals vs Literature vsini')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


### Residuals vs Weighted Median FWHM with Linear Fit

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Filter data
df_res = df.dropna(subset=[
    'calculated vsini, Km/s',
    'Lit. values of vsini',
    'weighted median (FWHM), Km/s',
    'Star Name'  # optional, for labeling
])

# Extract data
fwhm = df_res['weighted median (FWHM), Km/s']
y_calc = df_res['calculated vsini, Km/s']
y_lit = df_res['Lit. values of vsini']
residuals = y_lit - y_calc
names = df_res['Star Name']

# Linear fit of residuals vs FWHM
coeffs = np.polyfit(fwhm, residuals, 1)
x_fit = np.linspace(fwhm.min(), fwhm.max(), 100)
y_fit = np.polyval(coeffs, x_fit)

plt.figure(figsize=(8, 6))
plt.scatter(fwhm, residuals, color='green', alpha=0.7, label='Residuals')

# Plot fit line
plt.plot(x_fit, y_fit, 'r--', label=f'Fit: y = {coeffs[0]:.2f}x + {coeffs[1]:.2f}')

# Horizontal zero line
plt.axhline(0, color='gray', linestyle='--')

# Annotate star names
for i, star in enumerate(names):
    plt.text(fwhm.iat[i] + 0.03, residuals.iat[i] + 0.03, star, fontsize=8, alpha=0.7)

plt.xlabel('Weighted Median FWHM (km/s)')
plt.ylabel('Residuals (Lit. vsini - Calculated vsini) (km/s)')
plt.title('Residuals vs Weighted Median FWHM with Linear Fit')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


### Plot for Literature vs. Calculated values with error bars

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import pearsonr

# Filter rows with required columns present
df_plot = df.dropna(subset=[
    'calculated vsini, Km/s',
    'Lit. values of vsini',
    'error (vsini), Km/s'  # error in calculated vsini for xerr
])

# Extract data
x = df_plot['calculated vsini, Km/s']
y = df_plot['Lit. values of vsini']
xerr = df_plot['error (vsini), Km/s']  # error in calculated vsini
yerr = None  # No errors for literature values

# Linear fit
coeffs = np.polyfit(x, y, 1)
slope, intercept = coeffs
x_fit = np.linspace(x.min(), x.max(), 100)
y_fit = np.polyval(coeffs, x_fit)

# Statistics
r_value, p_value = pearsonr(x, y)
r_squared = r_value ** 2
N = len(x)

# Plot
plt.figure(figsize=(8, 6))
plt.errorbar(x, y, xerr=xerr, yerr=yerr, fmt='o',
             ecolor='gray', capsize=3, color='black',
             label='Stars with errors (x only)')

# Star name labels
if 'Star Name' in df_plot.columns:
    for i, row in df_plot.iterrows():
        plt.text(row['calculated vsini, Km/s'] + 0.03,
                 row['Lit. values of vsini'] + 0.03,
                 row['Star Name'], fontsize=8, alpha=0.6)

# Linear fit line
plt.plot(x_fit, y_fit, 'gray', linestyle='--',
         label=f'Linear fit: y = {slope:.2f}x + {intercept:.2f}')

# Labels and settings
plt.xlabel('Calculated v sin i (km/s)')
plt.ylabel('Literature v sin i (km/s)')
plt.title('Literature vsini vs Calculated vsini')
plt.grid(True)
plt.legend()

# Annotation box with stats
stats_text = (
    f"N = {N}\n"
    f"Pearson r = {r_value:.2f}\n"
    f"p-value = {p_value:.2g}\n"
    f"R² = {r_squared:.2f}\n"
    f"y = {slope:.2f}x + {intercept:.2f}"
)

plt.annotate(stats_text,
             xy=(0.05, 0.95), xycoords='axes fraction',
             fontsize=11, verticalalignment='top',
             bbox=dict(boxstyle="round,pad=0.4", facecolor='lavender', edgecolor='gray'))

plt.tight_layout()
plt.show()
