<a href="https://colab.research.google.com/github/treekeaw1/-mana-bento-web/blob/main/Untitledv%E0%B9%81%E0%B8%9E%E0%B8%97%E0%B9%80%E0%B8%97%E0%B8%B4%E0%B9%88%E0%B8%99%E0%B9%84%E0%B8%A1%E0%B9%88%E0%B8%AA%E0%B8%B8%E0%B9%88%E0%B8%A114_21.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from io import BytesIO
from google.colab import files
from collections import Counter
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from itertools import product
from tqdm.notebook import tqdm
import warnings

warnings.filterwarnings('ignore')

class LottoPatternEngineV6:
    def __init__(self, window_size=7): # Default window size to 7 as per latest analysis
        print("🚀 กำลังเริ่มต้น Lotto Pattern Engine v6.4 (Enhanced)...")
        self.models_6d = {}
        self.scaler_6d = StandardScaler()
        self.models_2d = {}
        self.scaler_2d = StandardScaler()
        self.df = pd.DataFrame()
        self.window_size = window_size # Define window_size for feature extraction

        self.CANDIDATE_POOL_SIZE_6D = 50000 # Increased pool size for more candidates
        self.TOP_N_PREDICTIONS = 5 # How many top predictions to show

        # These will be dynamically updated
        self.banned_sequences_6d = set() # Sequences to avoid in 6-digit numbers
        self.excluded_2d_numbers = set() # 2-digit numbers to exclude
        self.hot_2d_numbers = set() # Hot 2-digit numbers

        # Updated weights to incorporate new features
        # Note: 'pattern_match' and 'dynamic' weights are broadened to include new arrangement/flow features
        self.WEIGHTS_6D = {'ml_prob': 0.25, 'pattern_match': 0.40, 'dynamic': 0.25, 'uniqueness': 0.10}
        self.WEIGHTS_2D = {'ml_prob': 0.40, 'hot_cold': 0.60} # Hot/cold is now dynamic

        self.is_ready = False # Becomes True after successful data loading

        # Mapping for detailed position descriptions (0-7 for combined 2-digit + 6-digit)
        self.combined_digit_positions = {
            0: 'เลข 2 ตัวล่าง (หลักสิบ)',
            1: 'เลข 2 ตัวล่าง (หลักหน่วย)',
            2: 'รางวัลที่ 1 (หลักแสน)',
            3: 'รางวัลที่ 1 (หลักหมื่น)',
            4: 'รางวัลที่ 1 (หลักพัน)',
            5: 'รางวัลที่ 1 (หลักร้อย)',
            6: 'รางวัลที่ 1 (หลักสิบ)',
            7: 'รางวัลที่ 1 (หลักหน่วย)'
        }

    def load_and_prepare_data(self):
        """
        Allows the user to select a CSV file, loads the data, and performs cleaning.
        - Displays a button for file selection.
        - Skips the first row (if it's empty or not actual CSV data).
        - Validates and pads numbers with leading zeros.
        - Converts dates to datetime objects.
        - Sorts data from oldest to newest for chronological analysis.
        """
        print("📊 กรุณาคลิกปุ่มด้านล่างเพื่อเลือกไฟล์ CSV สำหรับวิเคราะห์")

        try:
            uploaded = files.upload()
            if not uploaded:
                print("❗ ไม่มีการเลือกไฟล์")
                return False

            filepath = list(uploaded.keys())[0]
            file_content = uploaded[filepath]

            print(f"กำลังโหลดและทำความสะอาดข้อมูลจากไฟล์: {filepath}")
            df = pd.read_csv(BytesIO(file_content), encoding='utf-8-sig', skiprows=[0], header=0)

            thai_col_mapping = {'วันที่': 'date', 'รางวัลที่ 1 (6 หลัก)': 'six_digit', 'เลข 2 ตัวล่าง': 'two_digit'}
            normalized_columns_map = {col.strip().lower(): col for col in df.columns}
            rename_dict = {}
            for thai_name, eng_name in thai_col_mapping.items():
                if thai_name.lower() in normalized_columns_map:
                    rename_dict[normalized_columns_map[thai_name.lower()]] = eng_name
            df.rename(columns=rename_dict, inplace=True)

            required_cols = ['date', 'six_digit', 'two_digit']
            if not all(col in df.columns for col in required_cols):
                missing_cols = [col for col in required_cols if col not in df.columns]
                raise ValueError(f"ไฟล์ CSV ไม่มีคอลัมน์ที่จำเป็น: {', '.join([c for c in required_cols if c not in df.columns])}")

            df['date'] = pd.to_datetime(df['date'], errors='coerce')
            df.dropna(subset=['date'], inplace=True)

            df['six_digit'] = pd.to_numeric(df['six_digit'], errors='coerce')
            df['two_digit'] = pd.to_numeric(df['two_digit'], errors='coerce')
            df.dropna(subset=['six_digit', 'two_digit'], inplace=True)

            df['six_digit'] = df['six_digit'].astype(int).astype(str).str.zfill(6)
            df['two_digit'] = df['two_digit'].astype(int).astype(str).str.zfill(2)

            # --- UPDATED: เพิ่มข้อมูลล่าสุดเข้าไปใน DataFrame (งวด 16/06/2568) ---
            # This is hardcoded for specific update, in a real system this would be fetched from API
            new_data = {'date': [pd.to_datetime('2025-06-16')], 'six_digit': ['507392'], 'two_digit': ['06']}
            new_df = pd.DataFrame(new_data)
            df = pd.concat([df, new_df], ignore_index=True)

            self.df = df.sort_values(by='date', ascending=True).reset_index(drop=True)
            print(f"✅ โหลดและอัปเดตข้อมูลสำเร็จ! จำนวนข้อมูลทั้งหมด: {len(self.df)} แถว")
            self.is_ready = True
            return True
        except Exception as e:
            print(f"❌ เกิดข้อผิดพลาดในการโหลดข้อมูล: {e}")
            self.is_ready = False
            return False

    def _calculate_arrangement_patterns(self, number_str_6d):
        """Calculates arrangement patterns for a 6-digit number."""
        counts = Counter(number_str_6d)
        num_unique = len(counts)
        features = {}
        # one-repeat: one digit repeats twice, others unique (e.g., 123445)
        features['one_repeat'] = 1 if (num_unique == 5 and any(c == 2 for c in counts.values())) else 0
        # high-repeat: two digits repeat twice (e.g., 112234) or one digit repeats 3+ times (e.g., 111234)
        features['high_repeat'] = 1 if (any(c >= 3 for c in counts.values()) or \
                                        (num_unique == 4 and sum(1 for c in counts.values() if c == 2) == 2)) else 0
        # balanced-parity: equal number of even/odd digits (3 even, 3 odd)
        even_count = sum(1 for d in number_str_6d if int(d) % 2 == 0)
        features['balanced_parity'] = 1 if even_count == 3 else 0
        return features

    def _calculate_retention_patterns(self, current_combined_str, previous_combined_str):
        """Calculates retention (common digits) between two 8-digit combined strings."""
        common_digits_count = len(set(current_combined_str) & set(previous_combined_str))
        # This can be used as a numerical feature directly.
        return common_digits_count

    def _create_features_for_prediction(self, historical_data_for_features_df):
        """
        Extracts a rich set of features from the historical data for prediction.
        This DataFrame represents the 'window_size' preceding rows.
        """
        features = {}
        if historical_data_for_features_df.empty:
            return {}

        last_date = historical_data_for_features_df['date'].iloc[-1]

        # Original features
        features['sin_month'] = np.sin(2 * np.pi * last_date.month / 12)
        features['cos_month'] = np.cos(2 * np.pi * last_date.month / 12)

        last_two_digit_num = int(historical_data_for_features_df['two_digit'].iloc[-1])
        features['last_2d_num'] = last_two_digit_num
        features['last_2d_sum'] = sum(int(d) for d in historical_data_for_features_df['two_digit'].iloc[-1])
        features['last_2d_is_hot'] = 1 if last_two_digit_num in self.hot_2d_numbers else 0 # Dynamic hot check

        # --- New Features from Data Analysis ---

        # 1. Overall Digit Frequencies (0-9) in Preceding `window_size` rows (8 digits * window_size total)
        all_preceding_digits_combined = "".join(
            row['two_digit'] + row['six_digit']
            for _, row in historical_data_for_features_df.iterrows()
        )
        for digit in range(10):
            features[f'overall_digit_freq_{digit}'] = all_preceding_digits_combined.count(str(digit))

        # 2. Positional Digit Frequencies (0-9) in Preceding `window_size` rows (for each of 8 positions)
        for pos_idx in range(8):
            digits_at_this_pos = "".join(
                (row['two_digit'] + row['six_digit'])[pos_idx]
                for _, row in historical_data_for_features_df.iterrows()
                if pos_idx < len(row['two_digit'] + row['six_digit'])
            )
            for digit in range(10):
                features[f'pos{pos_idx}_digit_freq_{digit}'] = digits_at_this_pos.count(str(digit))

        # 3. Arrangement Patterns from the LAST 6-digit draw
        last_six_digit_str = historical_data_for_features_df['six_digit'].iloc[-1]
        arrangement_features = self._calculate_arrangement_patterns(last_six_digit_str)
        features.update({f'arrangement_{k}': v for k, v in arrangement_features.items()})

        # 4. Retention Pattern from the LAST draw to the SECOND LAST draw
        if len(historical_data_for_features_df) >= 2:
            last_draw_combined = historical_data_for_features_df['two_digit'].iloc[-1] + historical_data_for_features_df['six_digit'].iloc[-1]
            second_last_draw_combined = historical_data_for_features_df['two_digit'].iloc[-2] + historical_data_for_features_df['six_digit'].iloc[-2]
            features['retention_count_last_2_draws'] = self._calculate_retention_patterns(last_draw_combined, second_last_draw_combined)
        else:
            features['retention_count_last_2_draws'] = 0 # No sufficient history

        # 5. Cyclical/Yearly Anomalies (e.g., deviation from 24 draws/year)
        # Calculate actual draws in the last 12 months (or last full year in data)
        # This requires more context than just `historical_data_for_features_df` if we want yearly counts.
        # For simplicity for now, let's just use the year of the last draw for cyclical features if needed.
        # If the full self.df is available, we can count for whole years.

        # To make yearly anomaly calculation, we need access to the full df, or pass yearly counts in context
        # For now, let's skip complex yearly anomaly in features, stick to simpler monthly cycles.

        return list(features.values())

    def _update_dynamic_number_lists(self, historical_df):
        """
        Dynamically updates hot and excluded 2-digit number lists based on recent historical data.
        """
        # Define the window for calculating hot/excluded numbers (e.g., last 100 draws)
        recent_2d_window_size = min(150, len(historical_df))
        recent_2d_data = historical_df['two_digit'].tail(recent_2d_window_size)

        all_2d_numbers = [f"{i:02d}" for i in range(100)] # All possible 00-99

        # Calculate frequency of 2-digit numbers
        two_digit_counts = Counter(recent_2d_data)

        # Hot Numbers: Top N most frequent in the recent window
        # Consider any number that appears more than X times or is in the top Y%
        hot_threshold = 2 # Appear at least 2 times in recent window
        self.hot_2d_numbers = {int(num) for num, count in two_digit_counts.items() if count >= hot_threshold}

        # Excluded Numbers: Numbers that haven't appeared in a long time (e.g., last 50 draws)
        # Or, very rare historically.
        self.excluded_2d_numbers = set()
        long_term_window_size = min(200, len(historical_df))
        long_term_2d_data = historical_df['two_digit'].tail(long_term_window_size)
        long_term_2d_counts = Counter(long_term_2d_data)

        # Exclude numbers that haven't appeared in the 'long_term_window'
        for num_str in all_2d_numbers:
            if int(num_str) not in self.hot_2d_numbers and long_term_2d_counts[num_str] == 0:
                self.excluded_2d_numbers.add(int(num_str))

        # Update banned 6-digit sequences (simple example: sequences from last N draws)
        # This is a very basic heuristic; complex sequence banning requires more thought.
        self.banned_sequences_6d = set()
        last_6d_window_size = min(10, len(historical_df)) # Look at last 10 6-digit draws
        for _, row in historical_df.tail(last_6d_window_size).iterrows():
            six_digit_str = row['six_digit']
            # Simple banned: repeat digits in sequence
            if len(set(six_digit_str)) < len(six_digit_str): # If there are any repeated digits
                 # Add some simple sequences like '11', '22' if they appear
                 for i in range(len(six_digit_str) - 1):
                     if six_digit_str[i] == six_digit_str[i+1]:
                         self.banned_sequences_6d.add(six_digit_str[i:i+2])
            # Adding some general highly undesirable sequences
            self.banned_sequences_6d.update({"123", "987", "789", "012", "111", "000"})


    def train(self, historical_df):
        """
        Trains GradientBoostingClassifier models for 6-digit and 2-digit numbers.
        Also updates dynamic number lists.
        """
        # Update dynamic lists BEFORE training, as features depend on them
        self._update_dynamic_number_lists(historical_df)

        # Train 6D models
        X_6d, Y_6d = [], [[] for _ in range(6)]
        # Ensure enough data for feature extraction (window_size + minimum for first feature)
        for i in range(self.window_size, len(historical_df)):
            past_df_for_features = historical_df.iloc[i - self.window_size : i] # Get window_size rows before target

            features = self._create_features_for_prediction(past_df_for_features)
            # Ensure features are not empty (e.g., if historical_df was too small for window_size)
            if not features:
                continue

            X_6d.append(features)
            target_6d = historical_df['six_digit'].iloc[i]
            for pos in range(6):
                Y_6d[pos].append(int(target_6d[pos]))

        if not X_6d:
            print("❗ ไม่สามารถฝึกโมเดล 6 หลักได้: ไม่มีข้อมูลคุณลักษณะ")
            return

        # Scale features
        X_6d_scaled = self.scaler_6d.fit_transform(X_6d)

        for pos in range(6):
            model = GradientBoostingClassifier(n_estimators=100, max_depth=5, learning_rate=0.05, random_state=42)
            model.fit(X_6d_scaled, Y_6d[pos])
            self.models_6d[pos] = model

        # Train 2D models
        X_2d, Y_2d = [], [[] for _ in range(2)]
        for i in range(self.window_size, len(historical_df)):
            past_df_for_features = historical_df.iloc[i - self.window_size : i]
            features = self._create_features_for_prediction(past_df_for_features)
            if not features:
                continue

            X_2d.append(features)
            target_2d = historical_df['two_digit'].iloc[i]
            for pos in range(2):
                Y_2d[pos].append(int(target_2d[pos]))

        if not X_2d:
            print("❗ ไม่สามารถฝึกโมเดล 2 หลักได้: ไม่มีข้อมูลคุณลักษณะ")
            return

        X_2d_scaled = self.scaler_2d.fit_transform(X_2d)
        for pos in range(2):
            model = GradientBoostingClassifier(n_estimators=100, max_depth=5, learning_rate=0.05, random_state=42)
            model.fit(X_2d_scaled, Y_2d[pos])
            self.models_2d[pos] = model

    def _score_and_rank_6d(self, features_scaled, context):
        """
        Scores and ranks 6-digit lottery number candidates based on ML probabilities and pattern metrics.
        New features are implicitly used through `features_scaled`.
        """
        # Get top probable digits for each position (e.g., top 4)
        top_cands_per_pos = [model.classes_[np.argsort(model.predict_proba(features_scaled)[0])[::-1][:4]] for model in self.models_6d.values()]

        # Generate candidates from the product of top digits
        candidates_raw = ["".join(map(str, c)) for c in product(*top_cands_per_pos)]

        # If candidate pool is too small, expand by including more digits
        if len(candidates_raw) < self.CANDIDATE_POOL_SIZE_6D:
            top_cands_per_pos = [model.classes_[np.argsort(model.predict_proba(features_scaled)[0])[::-1][:5]] for model in self.models_6d.values()] # Top 5
            candidates_raw = ["".join(map(str, c)) for c in product(*top_cands_per_pos)]

        # Sample a large candidate pool if raw candidates are too many
        candidates_to_score = candidates_raw
        if len(candidates_raw) > self.CANDIDATE_POOL_SIZE_6D:
            candidates_to_score = np.random.choice(candidates_raw, self.CANDIDATE_POOL_SIZE_6D, replace=False)

        scored_candidates = []
        recent_digit_pool = set(digit for s in context['recent_draws_6d'] for digit in s) # Digits from last 5 6D draws
        last_2_draws_pool = set(digit for s in context['last_2_draws_6d'] for digit in s) # Digits from last 2 6D draws

        for cand_str in candidates_to_score:
            # Exclude if starts with '0' (for 6-digit R1 usually) or contains banned sequences
            if cand_str.startswith('0') or any(seq in cand_str for seq in self.banned_sequences_6d):
                continue

            # ML Probability Score (log probability for stability)
            log_prob_sum = 0
            for pos, digit in enumerate(cand_str):
                try:
                    # Get probability for the specific digit in the candidate
                    digit_prob = self.models_6d[pos].predict_proba(features_scaled)[0][np.where(self.models_6d[pos].classes_ == int(digit))[0][0]]
                    log_prob_sum += np.log(digit_prob + 1e-9) # Add small epsilon to avoid log(0)
                except IndexError: # Digit not in model's known classes for this position (unlikely)
                    log_prob_sum += np.log(1e-9) # Penalize heavily

            # Pattern Match Score (based on digits in recent history)
            # This incorporates the spirit of 'Flow Pattern'
            matches_recent_pool = sum(1 for d in set(cand_str) if d in recent_digit_pool)
            pattern_score = 50 * (matches_recent_pool / 6) if matches_recent_pool >= 3 else 0 # Adjust threshold

            # Dynamic Score (based on very recent draws - hotness)
            dynamic_score = sum(15 for d in set(cand_str) if d in last_2_draws_pool)

            # Uniqueness Score (for diversity within the number itself)
            unique_score = len(set(cand_str)) * 10 # More unique digits, higher score

            # Arrangement Pattern Score (new)
            arr_patterns = self._calculate_arrangement_patterns(cand_str)
            arrangement_score = 0
            if arr_patterns['one_repeat']: arrangement_score += 15 # Reward one-repeat as it's very common
            if arr_patterns['high_repeat']: arrangement_score += 20 # Reward high-repeat as it's common
            if arr_patterns['balanced_parity']: arrangement_score += 10 # Reward balanced parity

            # Retention Score (new - how many digits match the last draw's digits)
            current_combined_cand = "".join(cand_str) # 6D
            last_historical_draw_combined = context['last_historical_combined_draw'] # This should be 8D

            # Take only the 6-digit part from the last historical draw for comparison
            last_historical_6d_str = last_historical_draw_combined[2:] # Assuming last_historical_combined_draw is '2d6d'

            retention_count = self._calculate_retention_patterns(current_combined_cand, last_historical_6d_str) # Compare 6D to 6D
            retention_score = retention_count * 10 # 10 points per retained digit (heuristic)


            # Final Score Calculation - Adjusted weights and new features
            final_score = (log_prob_sum * self.WEIGHTS_6D['ml_prob'] +
                           pattern_score * self.WEIGHTS_6D['pattern_match'] +
                           dynamic_score * self.WEIGHTS_6D['dynamic'] +
                           unique_score * self.WEIGHTS_6D['uniqueness'] +
                           arrangement_score + # Added arrangement score
                           retention_score) # Added retention score

            # Adjust score so it's positive and scaled (arbitrary offset)
            final_score = max(0, final_score + 100) # Ensure score is non-negative for display

            scored_candidates.append({'number': cand_str, 'score': final_score})

        # Ensure we return TOP_N_PREDICTIONS even if the list is smaller
        return sorted(scored_candidates, key=lambda x: x['score'], reverse=True)[:self.TOP_N_PREDICTIONS]

    def _score_and_rank_2d(self, features_scaled):
        """
        Scores and ranks 2-digit lottery number candidates based on ML probabilities and hot/cold status.
        New features are implicitly used through `features_scaled`.
        """
        scored_candidates = []
        for i in range(100):
            cand_str = f"{i:02d}"
            cand_num = int(cand_str)

            if cand_num in self.excluded_2d_numbers: # Use dynamically excluded list
                continue

            # ML Probability Score
            try:
                prob_d1 = self.models_2d[0].predict_proba(features_scaled)[0][np.where(self.models_2d[0].classes_ == int(cand_str[0]))[0][0]]
                prob_d2 = self.models_2d[1].predict_proba(features_scaled)[0][np.where(self.models_2d[1].classes_ == int(cand_str[1]))[0][0]]
                ml_score = prob_d1 * prob_d2
            except IndexError: # Should not happen if models are well trained on 0-9
                ml_score = 1e-9 # Very low score

            # Hot/Cold Score (uses dynamic hot list)
            hot_cold_score = 1.0 if cand_num in self.hot_2d_numbers else 0.5 # Higher for hot numbers

            final_score = (ml_score * self.WEIGHTS_2D['ml_prob']) + (hot_cold_score * self.WEIGHTS_2D['hot_cold'])
            scored_candidates.append({'number': cand_str, 'score': final_score})

        return sorted(scored_candidates, key=lambda x: x['score'], reverse=True)[:self.TOP_N_PREDICTIONS]

    def run_backtest(self, num_backtest_draws=50):
        """
        Runs a backtest to evaluate model performance over a specified number of past draws.
        """
        print("\n" + "="*80); print(f"V6.4 Backtest: กำลังทดสอบย้อนหลัง {num_backtest_draws} งวดล่าสุด..."); print("="*80)
        results = []
        # Start training from a point that allows enough history for feature extraction and initial training
        start_index = len(self.df) - num_backtest_draws
        if start_index < self.window_size * 2: # Ensure enough data for initial training and then for features
            print(f"❗ ข้อมูลไม่เพียงพอสำหรับ Backtest (ต้องมีอย่างน้อย {self.window_size * 2} งวด)")
            return

        # Iterate from the calculated start index up to (but not including) the last row
        # The last row of self.df is the *actual* latest draw, which we want to predict FOR
        # So we backtest up to the second to last row, and the final run predicts the very last.
        for i in tqdm(range(start_index, len(self.df)), desc="Backtesting"):
            # Ensure `train_df` has at least `self.window_size` rows for feature extraction
            # and enough rows for initial model training (e.g., first 10-20 draws).
            # The `train` method itself will check `len(historical_df)`.
            train_df = self.df.iloc[:i]

            # Re-train models with data available *up to this point* in the backtest
            if len(train_df) < self.window_size + 10: # Ensure sufficient data to train (arbitrary 10 for initial runs)
                continue # Skip if not enough historical data for robust training

            self.train(train_df) # Train models and update dynamic lists

            # Get the actual outcome for this iteration's target draw
            actual_row = self.df.iloc[i]

            # Features for prediction of `actual_row` would be based on data *before* `actual_row`
            features_for_pred_df = self.df.iloc[i - self.window_size : i]

            if features_for_pred_df.empty: # Should not happen if start_index is calculated correctly
                continue

            features = self._create_features_for_prediction(features_for_pred_df)
            if not features: # Ensure feature creation was successful
                continue

            features_scaled_6d = self.scaler_6d.transform([features])
            features_scaled_2d = self.scaler_2d.transform([features])

            # Context for scoring - based on `features_for_pred_df` (the history)
            context = {
                'recent_draws_6d': [r['six_digit'] for r in features_for_pred_df.tail(5).to_dict('records')],
                'last_2_draws_6d': [r['six_digit'] for r in features_for_pred_df.tail(2).to_dict('records')],
                'last_historical_combined_draw': features_for_pred_df['two_digit'].iloc[-1] + features_for_pred_df['six_digit'].iloc[-1]
            }

            top5_6d = [p['number'] for p in self._score_and_rank_6d(features_scaled_6d, context)]
            top5_2d = [p['number'] for p in self._score_and_rank_2d(features_scaled_2d)]

            results.append({
                'actual_6d': actual_row['six_digit'],
                'pred_6d': top5_6d,
                'actual_2d': actual_row['two_digit'],
                'pred_2d': top5_2d
            })

        # Calculate overall accuracy for the backtested draws
        acc_6d_top5 = np.mean([1 if r['actual_6d'] in r['pred_6d'] else 0 for r in results])
        acc_2d_top5 = np.mean([1 if r['actual_2d'] in r['pred_2d'] else 0 for r in results])

        print("\n--- 📊 สรุปผลการทดสอบย้อนหลัง (Backtest V6.4) ---")
        print(f"• เลข 6 หลัก (รางวัลที่ 1) - ความแม่นยำ (ผลจริงอยู่ใน Top 5): {acc_6d_top5:.1%}")
        print(f"• เลข 2 ตัวล่าง - ความแม่นยำ (ผลจริงอยู่ใน Top 5): {acc_2d_top5:.1%}")
        print("-" * 50)

    def run_final_prediction(self):
        """
        Runs the final prediction for the very next lottery draw using all available data.
        Generates 4 diverse prediction sets and applies post-processing to avoid exact repeats
        of the last historical row in the first prediction set.
        """
        print("\n" + "="*80); print("V6.4 Final Prediction: การทำนายผลสำหรับงวดถัดไป"); print("="*80)
        print("💪 กำลังฝึกโมเดลด้วยข้อมูลทั้งหมดที่มี...")
        self.train(self.df) # Train models with the full historical dataset

        # Features for the next prediction (based on the very last `window_size` rows of self.df)
        features_for_next_pred_df = self.df.tail(self.window_size)

        if features_for_next_pred_df.empty or len(features_for_next_pred_df) < self.window_size:
            print(f"❗ ไม่มีข้อมูลประวัติ {self.window_size} งวดล่าสุดเพียงพอสำหรับการทำนาย")
            return [] # Return empty list

        features = self._create_features_for_prediction(features_for_next_pred_df)
        if not features:
            print("❗ ไม่สามารถสร้างคุณลักษณะสำหรับการทำนายได้")
            return []

        features_scaled_6d = self.scaler_6d.transform([features])
        features_scaled_2d = self.scaler_2d.transform([features])

        # Context for scoring the final prediction
        context_for_pred = {
            'recent_draws_6d': [r['six_digit'] for r in features_for_next_pred_df.tail(5).to_dict('records')],
            'last_2_draws_6d': [r['six_digit'] for r in features_for_next_pred_df.tail(2).to_dict('records')],
            'last_historical_combined_draw': features_for_next_pred_df['two_digit'].iloc[-1] + features_for_next_pred_df['six_digit'].iloc[-1]
        }

        # --- Generate Prediction Sets (4 sets as requested) ---
        all_predicted_sets = []
        generated_sets_strings = set() # To ensure uniqueness among generated sets

        # Set 1: Most Probable (with non-recurrence post-processing)
        top5_6d_raw = self._score_and_rank_6d(features_scaled_6d, context_for_pred)
        top5_2d_raw = self._score_and_rank_2d(features_scaled_2d)

        predicted_6d_set1 = top5_6d_raw[0]['number'] if top5_6d_raw else '??????'
        predicted_2d_set1 = top5_2d_raw[0]['number'] if top5_2d_raw else '??'

        combined_pred_set1_str = predicted_2d_set1 + predicted_6d_set1
        last_historical_combined_str = context_for_pred['last_historical_combined_draw']

        # Post-processing for Set 1: Avoid exact repeat of the last historical draw
        if combined_pred_set1_str == last_historical_combined_str:
            print("\n🚨 การทำนายชุดที่ 1 ตรงกับงวดล่าสุดในประวัติ! กำลังปรับเปลี่ยนเพื่อหลีกเลี่ยงการซ้ำ...")
            modified_digits = list(combined_pred_set1_str)

            # Simple strategy: Increment 2-3 specific digits to ensure difference
            # Choose digits from less predictable positions (e.g., those with lower individual accuracy during backtest)
            # For now, let's pick fixed positions for simplicity and guarantee difference
            positions_to_modify = [0, 2, 5] # Example: L2-tens, R1-hundred-thousands, R1-hundreds

            for pos in positions_to_modify:
                if pos < len(modified_digits):
                    current_val = int(modified_digits[pos])
                    modified_digits[pos] = str((current_val + 1) % 10) # Increment by 1

            combined_pred_set1_str = "".join(modified_digits)
            predicted_2d_set1 = combined_pred_set1_str[0:2]
            predicted_6d_set1 = combined_pred_set1_str[2:8]

        all_predicted_sets.append({
            'รางวัลที่ 1': predicted_6d_set1,
            'เลข 2 ตัวล่าง': predicted_2d_set1
        })
        generated_sets_strings.add(predicted_2d_set1 + predicted_6d_set1)

        # Generate additional diverse prediction sets
        # Strategy: Mix top predictions from ML and introduce slight variations

        # Set 2: Combine top 6D, and 2nd 2D (or vice versa)
        if len(top5_6d_raw) > 0 and len(top5_2d_raw) > 1:
            set2_6d = top5_6d_raw[0]['number']
            set2_2d = top5_2d_raw[1]['number']
            new_combined_str = set2_2d + set2_6d
            if new_combined_str not in generated_sets_strings:
                all_predicted_sets.append({'รางวัลที่ 1': set2_6d, 'เลข 2 ตัวล่าง': set2_2d})
                generated_sets_strings.add(new_combined_str)

        # Set 3: Use 2nd top 6D, and top 2D (or introduce a small offset)
        if len(top5_6d_raw) > 1 and len(top5_2d_raw) > 0:
            set3_6d = top5_6d_raw[1]['number']
            set3_2d = top5_2d_raw[0]['number']
            new_combined_str = set3_2d + set3_6d
            if new_combined_str not in generated_sets_strings:
                all_predicted_sets.append({'รางวัลที่ 1': set3_6d, 'เลข 2 ตัวล่าง': set3_2d})
                generated_sets_strings.add(new_combined_str)

        # Set 4: Combine 2nd top 6D and 2nd top 2D, or introduce random changes
        if len(top5_6d_raw) > 1 and len(top5_2d_raw) > 1:
            set4_6d = top5_6d_raw[1]['number']
            set4_2d = top5_2d_raw[1]['number']
            new_combined_str = set4_2d + set4_6d
            if new_combined_str not in generated_sets_strings:
                all_predicted_sets.append({'รางวัลที่ 1': set4_6d, 'เลข 2 ตัวล่าง': set4_2d})
                generated_sets_strings.add(new_combined_str)

        # Ensure we always have 4 sets, even if the above strategies don't yield 4 unique ones
        while len(all_predicted_sets) < 4:
            # Take the last generated set, and randomly change a few digits
            last_generated_6d = all_predicted_sets[-1]['รางวัลที่ 1']
            last_generated_2d = all_predicted_sets[-1]['เลข 2 ตัวล่าง']
            modified_digits = list(last_generated_2d + last_generated_6d)

            # Change 2-3 random positions
            num_changes = np.random.randint(2, 4)
            positions_to_change = np.random.choice(8, num_changes, replace=False)

            for pos in positions_to_change:
                current_val = int(modified_digits[pos])
                modified_digits[pos] = str((current_val + np.random.randint(1, 5)) % 10) # Random increment 1-4

            new_combined_str = "".join(modified_digits)
            if new_combined_str not in generated_sets_strings:
                all_predicted_sets.append({
                    'รางวัลที่ 1': new_combined_str[2:],
                    'เลข 2 ตัวล่าง': new_combined_str[0:2]
                })
                generated_sets_strings.add(new_combined_str)

            # Safety break to prevent infinite loop if somehow always generates non-unique despite changes
            if len(generated_sets_strings) >= self.CANDIDATE_POOL_SIZE_6D: # Fallback to prevent infinite loop
                break


        print("\n--- 🏆 ผลการทำนาย 4 ชุดที่ดีที่สุด (เลข 6 หลัก และ เลข 2 ตัวล่าง) ---")
        for i, prediction_set in enumerate(all_predicted_sets):
            print(f"  ชุดที่ {i+1}: รางวัลที่ 1: {prediction_set['รางวัลที่ 1']} | เลข 2 ตัวล่าง: {prediction_set['เลข 2 ตัวล่าง']}")

        print("\n💡 หลักการคาดการณ์ (อิงตาม ML Models):")
        print("  - โมเดล Machine Learning (GradientBoostingClassifier) ได้รับการปรับปรุงด้วยคุณลักษณะที่ละเอียดขึ้น")
        print("    เช่น ความถี่ของตัวเลขตามตำแหน่ง, รูปแบบการจัดเรียง และการคงอยู่ของตัวเลข")
        print("  - มีการอัปเดตกลุ่มตัวเลข 'ร้อน' และ 'ไม่น่าจะออก' แบบอัตโนมัติจากข้อมูลประวัติ")
        print("  - ชุดการทำนายชุดแรกคือตัวเลขที่โมเดลคาดการณ์ว่ามีความน่าจะเป็นสูงสุด")
        print("    **และได้ถูกปรับเปลี่ยนหากซ้ำกับผลรางวัลของงวดล่าสุดในประวัติ (เพื่อหลีกเลี่ยงการออกเลขเดิมทั้งชุด)**")
        print("  - ชุดอื่นๆ (ชุดที่ 2, 3, 4) เป็นการนำตัวเลขที่มีความน่าจะเป็นรองลงมาหรือการปรับเปลี่ยนเล็กน้อยมาผสมผสาน")
        print("    โดยพยายามเพิ่มความหลากหลายและทางเลือกในการพิจารณาให้คุณได้มากที่สุด")
        print("\nหมายเหตุ: การคาดการณ์นี้อิงตามสถิติที่เรียนรู้จากข้อมูลในอดีตเท่านั้น ไม่ได้การันตีผล 100%")

        return all_predicted_sets


# Main Execution Block
def main():
    print("="*80); print("ยินดีต้อนรับสู่ The Lotto Pattern Engine v6.4"); print("="*80)
    engine = LottoPatternEngineV6(window_size=7) # Instantiate with window_size=7 as per plan

    if engine.load_and_prepare_data():
        engine.run_backtest(num_backtest_draws=50) # Run backtest
        engine.run_final_prediction() # Run final prediction

    print("\n😊 ขอบคุณที่ใช้งานโปรแกรมวิเคราะห์ครับ!")

if __name__ == "__main__":
    main()


ยินดีต้อนรับสู่ The Lotto Pattern Engine v6.4
🚀 กำลังเริ่มต้น Lotto Pattern Engine v6.4 (Enhanced)...
📊 กรุณาคลิกปุ่มด้านล่างเพื่อเลือกไฟล์ CSV สำหรับวิเคราะห์


Saving ตารางเปล่า 21.csv to ตารางเปล่า 21.csv
กำลังโหลดและทำความสะอาดข้อมูลจากไฟล์: ตารางเปล่า 21.csv
✅ โหลดและอัปเดตข้อมูลสำเร็จ! จำนวนข้อมูลทั้งหมด: 560 แถว

V6.4 Backtest: กำลังทดสอบย้อนหลัง 50 งวดล่าสุด...


Backtesting:   0%|          | 0/50 [00:00<?, ?it/s]