In [1]:
# 기본 라이브러리
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [4]:
# 🧪 통계 검정 (scipy) 
from scipy import stats  

# t-검정
# stats.ttest_1samp(sample, popmean)   # 일표본 t검정
# stats.ttest_rel(sample1, sample2)    # 대응표본 t검정
# stats.ttest_ind(group1, group2)      # 독립표본 t검정

# 카이제곱 검정
# stats.chisquare(f_obs, f_exp)        # 적합도 검정
# stats.chi2_contingency(table)        # 독립성/동질성 검정

# 상관분석
# stats.pearsonr(x, y)                 # 피어슨 상관계수
# stats.spearmanr(x, y)                # 스피어만 상관계수
# stats.kendalltau(x, y)               # 켄달 상관계수

# 정규성 검정
# stats.shapiro(data)                  # 샤피로-윌크 검정
# stats.normaltest(data)               # D’Agostino and Pearson 검정

# 분산 동질성 검정
# stats.levene(group1, group2, ...)    # Levene’s test
# stats.bartlett(group1, group2, ...)  # Bartlett’s test

# 🧪 간편 통계 검정
import pingouin as pg  # 예: pg.ttest(), pg.anova(), pg.corr()

# 📈 통계 모델링
import statsmodels.api as sm
import statsmodels.formula.api as smf  # 예: smf.ols('y ~ x', data=df)

In [None]:
# 머신러닝 모델링

#데이터 전처리
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score

from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder

# 결측치 처리
from sklearn.impute import SimpleImputer, KNNImputer, IterativeImputer

# 특성 선택 (Feature Selection)
from sklearn.feature_selection import (
    SelectKBest, SelectPercentile, chi2, f_classif, f_regression,
    RFE, RFECV,  # Recursive Feature Elimination (Backward Selection)
    SelectFromModel,
    SequentialFeatureSelector  # Forward/Backward Selection
)

# 차원 축소
from sklearn.decomposition import PCA

# 선형모델
from sklearn.linear_model import (
    LinearRegression, LogisticRegression, Ridge, Lasso,
    ElasticNet, SGDClassifier, SGDRegressor
)

# 트리모델
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor

# knn
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor

# 나이브 베이즈
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB

# SVM 계열
from sklearn.svm import SVC, SVR, LinearSVC, LinearSVR

# 클러스터링
from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering, SpectralClustering

# 앙상블
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.ensemble import (
    ExtraTreesClassifier, ExtraTreesRegressor,
    AdaBoostClassifier, AdaBoostRegressor,
    BaggingClassifier, BaggingRegressor,
    StackingClassifier, StackingRegressor,
    VotingClassifier, VotingRegressor
)

# 부스팅
# XGBoost  clf = XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=3)
from xgboost import XGBClassifier, XGBRegressor

# LightGBM
from lightgbm import LGBMClassifier, LGBMRegressor

# CatBoost
from catboost import CatBoostClassifier, CatBoostRegressor

# 평가
from sklearn.model_selection import KFold, StratifiedKFold, cross_validate

from sklearn.metrics import (accuracy_score, confusion_matrix, roc_auc_score, mean_squared_error,
    classification_report, precision_score, recall_score, f1_score,
    r2_score, mean_absolute_error, mean_absolute_percentage_error
)

In [5]:
# sklearn은 대부분 아래와 같은 형식
from sklearn.linear_model import BayesianRidge
model = BayesianRidge()
#model.fit(x_train,y_train)
#model.predict(x_test)

In [None]:
# statsmodel.api 는 이런 형식이지만 객체마다 다름
import statsmodels.api as sm


#model = sm.OLS(y, X)     # ① 모델 생성
#result = model.fit()     # ② 학습
#print(result.summary())  # ③ 요약 출력

In [7]:
import statsmodels.api as sm
from sklearn.datasets import load_diabetes

diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target

# 모델 학습
X = sm.add_constant(X)  # 절편 추가 (중요!) lm 은 절편추가 안해도 됨
model = sm.OLS(y, X)
result = model.fit()

# 결과 출력
#print(result.summary())