## Comparisons

## 1 . Rod Data (Cyan)
Pure tungsten and Rieth tungsten with WL10 alloy
Goal - (1) demonstrate trend + effect of alloying for rod tungsten
        (2) lowest temperature available creep data


## 2. Wire Data (Green)
Pure tungsten, K-doped tungsten
Goal - Demonstrate effect of 
    (1) K-doping
    (1) Recrystallization

## 3. Sheet data (Red)
Pure tungssten, sintered and W-25$ Re alloy
Goals - (1) demonstrate braod trend for tungsten with most plentiful data sources  
        (2) Show effect of W-25% Re alloying

In [62]:
import numpy as np
import pandas as pd
from statsmodels.regression.linear_model import OLS, OLSResults, PredictionResults
from statsmodels.tools import add_constant
from matplotlib import pyplot as plt
from typing import Callable
from abc import ABC,abstractmethod
from scipy.stats.distributions import t as t_dist
import sys
sys.path.append('../')
from common_util import ProjectPaths,setup_axis_default,setup_plotting_format
import re
from sklearn.preprocessing import PolynomialFeatures
from scipy.optimize import minimize_scalar

paths = ProjectPaths()
setup_plotting_format()

PSI_TO_MPA = 0.00689476
HOURS_TO_SECONDS = 3600
KGMM2_TO_MPA = 9.80665 


class LarsonMiller(ABC):

    r""" 
    Base class for Larson Miller model. The equation is:
    
    $$
    Z = T*(C + log(t))
    S_t = f(Z;\theta)
    $$
    
    where $f$ is an arbitary function and $\theta$ are function parameters,and $C$ is the so-called
    Larson-Miller Parameter.
    """

    def __init__(self, C: float = None,
                       results: OLS = None):
        self.C = C
        self.SE_C = None
        self.results = results
        self.prediction_results = None

    @staticmethod
    def lmp(t: np.ndarray,T: np.ndarray,C: float) -> np.ndarray:
        return T*(C + np.log(t))
    
    def lmp_confint(self,t: np.ndarray,T: np.ndarray,alpha: float = 0.05) -> np.ndarray:
        """
        Compute the confidence intervals for the Larson Miller parameter.

        Parameters
        ----------
        t : np.ndarray
            The time to failure.
        T : np.ndarray
            The temperature.
        C : float
            The Larson Miller parameter.
        alpha : float, optional
            The significance level. Default is 0.05.

        Returns
        -------
        np.ndarray
            The confidence intervals for the Larson Miller parameter.
        """
        t = t_dist.ppf(1 - alpha / 2, len(t) - 1)
        return np.array([self.lmp(t,T,self.C- t * self.SE_C),self.lmp(t,T,self.C + t * self.SE_C)]).T
    
    def get_lmp(self,t: np.ndarray,
                     T: np.ndarray,
                     conf_int: bool = False,
                     alpha: float = 0.05) -> np.ndarray:
        r"""
        Compute the Larson Miller parameter.

        Parameters
        ----------
        t : np.ndarray
            The time to failure.
        T : np.ndarray
            The temperature.

        Returns
        -------
        np.ndarray
            The Larson Miller parameter.
        """
        if conf_int:
            return self.lmp(t,T,self.C),self.lmp_confint(t,T,alpha=alpha)
        return self.lmp(t,T,self.C)
    
    @abstractmethod
    def predict_from_lmp(self, Z: np.ndarray,
                               conf_int: float = False,
                               alpha: float = None) -> np.ndarray: 

        
        pass


    @abstractmethod
    def fit(self,t: np.ndarray,
                 T: np.ndarray,
                 S_t: np.ndarray,
                 **kwargs):

        pass

    @abstractmethod
    def param_confint(self,alpha: float = 0.05) -> np.ndarray:
        """
        Compute the confidence intervals for the parameters.

        Parameters
        ----------
        alpha : float, optional
            The significance level. Default is 0.05.

        Returns
        -------
        np.ndarray
            The confidence intervals for the parameters.
        """
        pass

    @abstractmethod
    def get_prediction(self,t: np.ndarray,T: np.ndarray) -> PredictionResults:      
        pass


class LarsonMillerSLR(LarsonMiller):
    r"""
    Fit the larson miller model the equation: 

    $$
    Z = T (C + log(t))
    S_t = \beta_0 + \beta_1 Z = \beta_0 + \beta_1 C T + \beta_1 T log(t)
    $$

    Approximate the variance in $\hat{C}$ using the delta method, that is: 

    $$
    \mathbb{V}\text{ar} \approx & \frac{1}{\hat{\beta}^2_1} \sigma_{\beta_1 C}^2 + 
                                & \frac{\hat{C}^2}{\hat{\beta}_1^2} \sigma_{\beta_1}^2 - 
                                & 2 \frac{\hat{C}}{\hat{\beta}_1^2} \sigma_{ab}
    $$
    """

    def fit(self,t: np.ndarray,
            T: np.ndarray,
            S_t: np.ndarray,
            **kwargs) -> None:
        r"""
        Fit the Larson Miller model using ordinary least squares regression.

        Parameters
        ----------
        t : np.ndarray
            The time to failure.
        T : np.ndarray
            The temperature.
        S_t : np.ndarray
            The stress at time t.
        C : float, optional
            The Larson Miller parameter. If not provided, it will be estimated from the data.
        deg : int, optional
            The degree of the polynomial to fit. Default is 1 (linear).
        kwargs : additional arguments for OLS.

        Returns
        -------
        OLSResults
            The fitted model results.
        """
        X = add_constant(
            pd.DataFrame(np.array([T.values,np.log(t.values)*T.values]).T, 
            columns = ['Temperature','log(t)*T'])
            )

        self.results = OLS(S_t.values,X).fit(**kwargs)
        self.C = self.results.params['Temperature']/self.results.params['log(t)*T']
        cov = self.results.cov_params().to_numpy()
        self.SE_C = np.sqrt(
            (1/self.results.params['log(t)*T']**2) * cov[1,1] + 
            (self.C**2/self.results.params['log(t)*T']**2) * cov[2,2] - 
            (2*self.C/self.results.params['log(t)*T']**2) * cov[1,2]
        )
        self.mean_T2 = np.mean(T**2)
        return self

    def param_confint(self, alpha = 0.05):
        params = self.results.conf_int(alpha)
        t = t_dist.ppf(1 - alpha / 2, self.results.df_resid)
        C = np.array([self.C - t * self.SE_C, self.C + t * self.SE_C])
        params.loc['beta_1',:] = self.results.params['log(t)*T']
        params.loc['C',:] = C 
        params.drop(index = ['Temperature','log(t)*T'], inplace = True)
        return params


    def lmp_confint(self,t: np.ndarray,T: np.ndarray,alpha: float = 0.05) -> np.ndarray:
        """
        Compute the confidence intervals for the Larson Miller parameter.

        Parameters
        ----------
        t : np.ndarray
            The time to failure.
        T : np.ndarray
            The temperature.
        C : float
            The Larson Miller parameter.
        alpha : float, optional
            The significance level. Default is 0.05.

        Returns
        -------
        np.ndarray
            The confidence intervals for the Larson Miller parameter.
        """
        t = t_dist.ppf(1 - alpha / 2, len(t) - 1)
        return np.array([self.lmp(t,T,self.C- t * self.SE_C),self.lmp(t,T,self.C + t * self.SE_C)]).T
        
    def predict_from_lmp(self, Z: np.ndarray,
                               conf_int: float = False,
                               forecast: bool = False,
                               alpha: float = 0.05) -> np.ndarray: 

        beta_0,beta_1 = self.results.params['const'],self.results.params['log(t)*T']
        S_t = beta_0 + beta_1 * Z
        if conf_int:
            var_St = self.results.bse['const']**2 + \
                (Z**2 * self.results.bse['log(t)*T']**2) + \
                (2 * Z * self.results.cov_params().loc['const','log(t)*T']) + \
                self.mean_T2* self.SE_C**2*self.results.bse['log(t)*T']**2 
            t = t_dist.ppf(1 - alpha / 2, self.results.df_resid)
            if forecast:
                var_St += self.results.mse_resid
            
            return S_t, np.array([S_t - t * np.sqrt(var_St), S_t + t * np.sqrt(var_St)])
        else:
            return S_t
        

    def get_prediction(self, t, T):
        self.prediction_results = self.results.get_prediction(
            add_constant(pd.DataFrame([np.array([T,np.log(t)*T])].T, columns = ['Temperature','log(t)*T']))
        )
        return self.prediction_results


class LarsonMillerPR(LarsonMiller):

    r"""
    Fit the larson miller model the equation: 

    $$
    Z = T (C + log(t))
    S_t = \beta_0 + \beta_1 Z + \beta_2 Z^2 + \hdots + \beta_p Z^p
    $$

    Approximate the variance in $\hat{C}$ using a bootstrap procedure

    """
        
    def fit(self,
            t: np.ndarray,
            T: np.ndarray,
            S_t: np.ndarray,
            deg: int = 1,
            C_bounds = (1,100),
            **kwargs)-> None:
        
        self.t,self.T,self.S_t = t.values,T.values,S_t.values
        self.deg = deg
        self.C,self.results = self._fit(self.t,self.T,self.S_t,self.deg,C_bounds,**kwargs)

    def _fit(self,
            t: np.ndarray,
            T: np.ndarray,
            S_t: np.ndarray,
            deg: int,
            C_bounds = (1,100),
            **kwargs) -> None:
        r"""
        Fit the Larson Miller model using polynomial regression.

        Parameters
        ----------
        t : np.ndarray
            The time to failure.
        T : np.ndarray
            The temperature.
        S_t : np.ndarray
            The stress at time t.
        deg : int, optional
            The degree of the polynomial to fit. Default is 1 (linear).
        kwargs : additional arguments for OLS.

        Returns
        -------
        OLSResults
            The fitted model results.
        """
        def _opt_func(C: float):
            X = PolynomialFeatures(deg).fit_transform(self.lmp(t,T,C)[:,np.newaxis])
            return np.linalg.norm(np.linalg.lstsq(X, S_t)[1])
        
        # Find the optimal C using a scalar minimization
        res = minimize_scalar(_opt_func, bounds=C_bounds, method='bounded')
        C = res.x
        X = PolynomialFeatures(deg).fit_transform(self.lmp(t,T,C)[:,np.newaxis])
        results = OLS(S_t,X).fit(**kwargs)
        return C,results

    def param_confint(self, n: int = 100,
                            alpha = 0.05,
                            seed = 4):
        np.random.seed(seed)
        C = np.empty(n)
        param = np.empty((n, self.results.params.shape[0]))
        selection = np.random.choice(np.arange(self.t.shape[0]), size=(n, self.t.shape[0]), replace=True)
        for i in range(n):
            t, T, S_t = self.t[selection[i]], self.T[selection[i]], self.S_t[selection[i]]
            C[i],results = self._fit(t,T,S_t,self.deg)
            param[i] = results.params
        
        self.SE_C = np.std(C)
        self.cov_params = np.cov(param)
        param = np.concatenate([C[:,np.newaxis],param], axis=1)
        conf_int = np.percentile(param,[100*alpha/2,100*(1-alpha/2)], axis=0)
        return conf_int

    def get_prediction(self, t, T): 
        raise NotImplementedError('get prediction not implemented in LarsonMillerPR')
    
    def get_prediction_mean(self, t, T):
        self.prediction_results = self.results.get_prediction(
            PolynomialFeatures(self.deg).fit_transform(self.lmp(t,T,self.C)[:,np.newaxis])
        )
        return self.prediction_results
    
    def get_prediction_confint(self, tnew: np.ndarray, Tnew: np.ndarray,
                               n: int = 100, 
                               alpha = 0.05, seed: int = 4):
        np.random.seed(seed)
        prediction = np.empty((n, tnew.shape[0]))
        selection = np.random.choice(np.arange(self.t.shape[0]), size=(n, self.t.shape[0]), replace=True)
        for i in range(n):
            t, T, S_t = self.t[selection[i]], self.T[selection[i]], self.S_t[selection[i]]
            C,results = self._fit(t,T,S_t,self.deg)
            X = PolynomialFeatures(self.deg).fit_transform(self.lmp(tnew,Tnew,C)[:,np.newaxis])
            S_t = results.predict(X)
            prediction[i] = S_t
        
        conf_int = np.percentile(prediction,[100*alpha/2,100*(1-alpha/2)], axis=0)
        return conf_int
    
    def _bootstrap_conf_int_from_lmp(self, Z: np.ndarray,
                                           n: int,
                                           alpha: float= 0.05,
                                           seed: int = 4) -> np.ndarray:
        np.random.seed(seed)
        prediction = np.empty((n, Z.shape[0]))
        selection = np.random.choice(np.arange(self.t.shape[0]), size=(n, self.t.shape[0]), replace=True)
        for i in range(n):
            t, T, S_t = self.t[selection[i]], self.T[selection[i]], self.S_t[selection[i]]
            _,results = self._fit(t,T,S_t,self.deg)
            X = PolynomialFeatures(self.deg).fit_transform(Z[:,np.newaxis])
            S_t = results.predict(X)
            prediction[i] = S_t
        
        conf_int = np.percentile(prediction,[100*alpha/2,100*(1-alpha/2)], axis=0)
        return conf_int

    def predict_from_lmp(self, Z, n: int = 100,conf_int = False, alpha = 0.05, seed = 4):
        
        S_t = self.results.predict(PolynomialFeatures(self.deg).fit_transform(Z[:,np.newaxis]))
        if conf_int:
            return S_t, self._bootstrap_conf_int_from_lmp(Z, n, alpha=alpha, seed = seed)
        else:
            return S_t
    

class LarsonMillerPowerLaw(LarsonMiller):

    r"""
    Fit the larson miller model the equation: 

    $$
    Z = T (C + log(t))
    S_t = A_1 Z^\beta_1 \implies S_t = log(S_t) = log(A_1) + \beta_1 ln(Z)
    log(S_t) = \beta_0 + \beta_1 log(Z) = \beta_0 + \beta_1 (log(T) + ln(C + log(t)))
    $$

    Approximate the variance in $\hat{C}$ using a bootstrap procedure
    """

    def fit(self):
        pass

        


In [None]:
data = pd.read_csv(paths.CREEP_DATA / 'GEMP-457A-1002/Re25WSheet.csv',index_col = None,header = 0)
print(data.columns  )
data['Stress [MPa]'] = data['Stress (kg/mm²)']*KGMM2_TO_MPA
data['Rupture Time [s]'] = data['Rupture Time (hr)']*HOURS_TO_SECONDS
data.dropna(inplace = True) 

model = LarsonMillerSLR()
model.fit(
    t = data['Rupture Time [s]'],
    T = data['Temperature (°C)'],
    S_t = data['Stress [MPa]'],
)

lmp = model.lmp(
    t = data['Rupture Time [s]'],
    T = data['Temperature (°C)'],
    C = model.C
)

lmp_plot = np.linspace(
    lmp.min(),
    lmp.max(),
    100
)

St_plot,St_conf_int  = model.predict_from_lmp(
    Z = lmp_plot,
    conf_int = True,
    alpha = 0.05
)

model.param_confint(alpha = 0.05)

lmp,lmp_conf = model.get_lmp(
    t = data['Rupture Time [s]'],
    T = data['Temperature (°C)'],
    conf_int = True
)

fig, ax = plt.subplots(1, 1, figsize=(8, 6))
setup_axis_default(ax)

ax.plot(lmp_plot,St_plot, label = 'Predicted', color = 'blue')
"""
ax.fill_between(
    lmp_plot,
    St_conf_int[0],
    St_conf_int[1],
    color = 'blue',
    alpha = 0.2,
    label = '95% CI'
)
"""
#color_array = plt.get_cmap('viridis')(scaled_strain_rate)
ax.scatter(
    lmp, data['Stress [MPa]'],
    label = 'Observed',
    color = 'blue',
    s = 15,
    edgecolor = 'k'
)
"""
ax.errorbar(
    lmp, data['Stress [MPa]'],
    xerr = np.array([lmp - lmp_conf[:,0],lmp-  lmp_conf[:,1]]),
    fmt = 'o',
    color = 'red',
    label = 'Observed'
)
"""

ax.set_xlabel("$Z$",fontsize = 12)    
ax.set_ylabel(r"\textbf{$S_t$ [MPa]}",fontsize = 12)

fig.tight_layout()


Index(['Temperature (°C)', 'Stress (kg/mm²)', 'Time to 0.2% Strain (hr)',
       'Time to 0.5% Strain (hr)', 'Time to 1% Strain (hr)',
       'Time to 2% Strain (hr)', 'Time to 3% Strain (hr)',
       'Time to 5% Strain (hr)', 'Time to 10% Strain (hr)',
       'Rupture Time (hr)', 'Rupture Strain (%)'],
      dtype='object')


KeyError: 'Linear Creep Rate (min^-1)'