# 📊 Journal Entry ID Creator

**Automatically create balanced journal entries from your Excel data in 3 simple steps!**

This tool will:
- ✅ Create balanced journal entries (debits = credits)
- ✅ Group lines by date and matching fields
- ✅ Assign Journal IDs to every line
- ✅ Handle minimum 2-line requirement
- ✅ Download results instantly

## 🚀 Instructions:
1. **Run the setup** (click ▶️ on the cell below)
2. **Upload your Excel file** when prompted
3. **Download your results** with Journal IDs added

---


In [None]:
# @title 🔧 **STEP 1: Setup & Configuration** { display-mode: "form" }
# @markdown Click the ▶️ button to install required packages and load the journal entry creator.

print("🔧 Setting up Journal Entry ID Creator...")
print("=" * 50)

# Install required packages
import subprocess
import sys

def install_package(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package, "-q"])

try:
    install_package("pandas")
    install_package("openpyxl")
    print("✅ Required packages installed successfully!")
except Exception as e:
    print(f"❌ Error installing packages: {e}")

# Import libraries
try:
    import pandas as pd
    import numpy as np
    from itertools import combinations
    import io
    from pathlib import Path
    from google.colab import files
    import warnings
    warnings.filterwarnings('ignore')
    print("✅ Libraries imported successfully!")
except Exception as e:
    print(f"❌ Error importing libraries: {e}")

# Load the Journal Entry Creator (implementation hidden)
exec('''
class JournalEntryCreator:
    def __init__(self):
        self.journal_lines = None
        self.grouped_entries = {}
        self.unassigned_lines = []
        
    def _normalize_and_deduplicate_columns(self, columns):
        normalized = []
        for col in columns:
            name = '' if pd.isna(col) else str(col).strip()
            if name.lower() in ('', 'nan', 'none'):
                name = 'Unnamed'
            normalized.append(name)
        seen = {}
        unique_cols = []
        for name in normalized:
            count = seen.get(name, 0)
            unique_name = f"{name}_{count}" if count > 0 else name
            seen[name] = count + 1
            unique_cols.append(unique_name)
        return unique_cols
        
    def load_data_from_uploaded_file(self, uploaded_file_data, filename):
        """Load journal lines from uploaded Excel file data"""
        try:
            df_all = pd.read_excel(io.BytesIO(uploaded_file_data), header=None)
            
            if len(df_all) == 0:
                print("No data found in Excel file.")
                return False
            
            first_row = df_all.iloc[0].values
            second_row = df_all.iloc[1].values if len(df_all) > 1 else None
            
            if second_row is not None and isinstance(second_row[0], str) and 'Posted Date' in str(second_row[0]):
                print("✅ Detected field names in second row")
                template_row = second_row
                df = df_all.iloc[2:].copy()
            elif isinstance(first_row[0], str) and 'Posted Date' in str(first_row[0]):
                print("✅ Detected template headers in first row")
                template_row = first_row
                df = df_all.iloc[1:].copy()
            else:
                print("✅ Using default column structure")
                template_row = ['Posted Date', 'Account ID', 'Debit Amount', 'Credit Amount'] + [f'Optional_{i}' for i in range(len(df_all.columns) - 4)]
                df = df_all.copy()
            
            if len(df) == 0:
                print("❌ No data found. Please add journal line data.")
                return False
            
            df.columns = self._normalize_and_deduplicate_columns(template_row[:len(df.columns)])
            df = df.dropna(how='all')
            
            required_cols = ['Posted Date', 'Account ID', 'Debit Amount', 'Credit Amount']
            for col in required_cols:
                if col not in df.columns:
                    print(f"❌ Required column '{col}' not found")
                    return False
            
            df['Posted Date'] = pd.to_datetime(df['Posted Date'])
            df['Debit Amount'] = pd.to_numeric(df['Debit Amount'], errors='coerce').fillna(0)
            df['Credit Amount'] = pd.to_numeric(df['Credit Amount'], errors='coerce').fillna(0)
            df['_row_index'] = range(len(df))
            
            self.journal_lines = df
            print(f"📊 Loaded {len(df)} journal lines from {filename}")
            # Debug columns
            print("Columns:", list(df.columns))
            return True
            
        except Exception as e:
            print(f"❌ Error loading Excel file: {e}")
            return False
    
    def get_optional_fields(self):
        if self.journal_lines is None:
            return []
        
        required_cols = ['Posted Date', 'Account ID', 'Debit Amount', 'Credit Amount', '_row_index']
        optional_cols = []
        
        for col in self.journal_lines.columns:
            if col not in required_cols:
                try:
                    non_empty = self.journal_lines[col].notna() & (self.journal_lines[col] != '') & (self.journal_lines[col] != '[INSERT FIELD NAME]')
                    if non_empty.any():
                        optional_cols.append(col)
                except:
                    continue
        return optional_cols
    
    def check_balance(self, group_df):
        if len(group_df) < 2:
            return False
        total_debits = group_df['Debit Amount'].sum()
        total_credits = group_df['Credit Amount'].sum()
        return abs(total_debits - total_credits) < 0.01
    
    def generate_grouping_combinations(self, optional_fields, max_fields=5):
        combinations_to_try = []
        for r in range(min(len(optional_fields), max_fields), 0, -1):
            for combo in combinations(optional_fields, r):
                combinations_to_try.append(['Posted Date'] + list(combo))
        combinations_to_try.append(['Posted Date'])
        return combinations_to_try
    
    def create_journal_entries(self, max_optional_fields=5):
        if self.journal_lines is None:
            print("❌ No data loaded")
            return False
        
        print("🔄 Creating journal entries...")
        
        optional_fields = self.get_optional_fields()
        print(f"📋 Found optional fields: {optional_fields}")
        
        field_combinations = self.generate_grouping_combinations(optional_fields, max_optional_fields)
        print(f"🔍 Testing {len(field_combinations)} grouping combinations...")
        
        assigned_lines = set()
        journal_entry_id = 1
        
        for fields in field_combinations:
            # Get unassigned lines and reset index to avoid groupby issues
            unassigned_df = self.journal_lines[~self.journal_lines['_row_index'].isin(assigned_lines)].copy().reset_index(drop=True)
            
            if len(unassigned_df) == 0:
                break
            
            # Dedupe and validate grouping fields
            valid_fields = [col for col in fields if col in unassigned_df.columns]
            valid_fields = list(dict.fromkeys(valid_fields))
            if not valid_fields:
                continue
            
            try:
                grouped = unassigned_df.groupby(valid_fields, dropna=False, sort=False)
            except Exception as e:
                print(f"   Error grouping by {valid_fields}: {e}")
                continue
            
            balanced_groups = 0
            
            for group_key, group_df in grouped:
                if self.check_balance(group_df):
                    je_id = f"JE{journal_entry_id:04d}"
                    
                    group_df_clean = group_df.copy().reset_index(drop=True)
                    
                    self.grouped_entries[je_id] = {
                        'lines': group_df_clean,
                        'grouping_fields': valid_fields,
                        'group_key': group_key,
                        'total_debits': group_df_clean['Debit Amount'].sum(),
                        'total_credits': group_df_clean['Credit Amount'].sum()
                    }
                    
                    assigned_lines.update(group_df['_row_index'].tolist())
                    journal_entry_id += 1
                    balanced_groups += 1
            
            if balanced_groups > 0:
                print(f"   ✅ Created {balanced_groups} entries with grouping: {valid_fields}")
        
        # Handle remaining lines
        remaining_lines = self.journal_lines[~self.journal_lines['_row_index'].isin(assigned_lines)].copy().reset_index(drop=True)
        
        if len(remaining_lines) > 0:
            print(f"📝 Processing {len(remaining_lines)} remaining lines...")
            
            for _, line in remaining_lines.iterrows():
                debit = line['Debit Amount']
                credit = line['Credit Amount']
                line_date = line['Posted Date']
                
                if debit == 0 and credit == 0:
                    assigned_to_existing = False
                    for je_id, entry_data in self.grouped_entries.items():
                        entry_date = entry_data['lines']['Posted Date'].iloc[0]
                        if entry_date.date() == line_date.date():
                            line_df = pd.DataFrame([line]).reset_index(drop=True)
                            entry_data['lines'] = pd.concat([entry_data['lines'].reset_index(drop=True), line_df], ignore_index=True)
                            entry_data['total_debits'] += line['Debit Amount']
                            entry_data['total_credits'] += line['Credit Amount']
                            assigned_lines.add(line['_row_index'])
                            assigned_to_existing = True
                            print(f"   ✅ Zero-amount line assigned to {je_id}")
                            break
                    if not assigned_to_existing:
                        je_id = f"JE{journal_entry_id:04d}"
                        line_df = pd.DataFrame([line]).reset_index(drop=True)
                        self.grouped_entries[je_id] = {
                            'lines': line_df,
                            'grouping_fields': ['Zero Amount Entry'],
                            'group_key': f"Zero amount: {line['Account ID']}",
                            'total_debits': debit,
                            'total_credits': credit
                        }
                        assigned_lines.add(line['_row_index'])
                        journal_entry_id += 1
                elif debit != 0 and credit != 0:
                    continue
                else:
                    je_id = f"JE{journal_entry_id:04d}"
                    line_df = pd.DataFrame([line]).reset_index(drop=True)
                    self.grouped_entries[je_id] = {
                        'lines': line_df,
                        'grouping_fields': ['Individual Entry'],
                        'group_key': f"Single line: {line['Account ID']}",
                        'total_debits': debit,
                        'total_credits': credit
                    }
                    assigned_lines.add(line['_row_index'])
                    journal_entry_id += 1
        
        self.unassigned_lines = self.journal_lines[~self.journal_lines['_row_index'].isin(assigned_lines)].copy()
        
        print(f"\\n📊 Summary:")
        print(f"   ✅ Journal entries created: {len(self.grouped_entries)}")
        print(f"   ✅ Lines assigned: {len(assigned_lines)}")
        print(f"   ⚠️  Invalid lines: {len(self.unassigned_lines)}")
        
        return True
    
    def generate_output(self, original_filename):
        if self.journal_lines is None:
            return None
        
        output_df = self.journal_lines.copy()
        output_df['Journal ID'] = ''
        
        for je_id, entry_data in self.grouped_entries.items():
            row_indices = entry_data['lines']['_row_index'].tolist()
            output_df.loc[output_df['_row_index'].isin(row_indices), 'Journal ID'] = je_id
        
        output_df = output_df.drop('_row_index', axis=1)
        
        cols = list(output_df.columns)
        cols.remove('Journal ID')
        posted_date_idx = cols.index('Posted Date')
        cols.insert(posted_date_idx, 'Journal ID')
        output_df = output_df[cols]
        
        input_path = Path(original_filename)
        output_filename = f"{input_path.stem}_with_journal_ids{input_path.suffix}"
        
        output_buffer = io.BytesIO()
        output_df.to_excel(output_buffer, index=False)
        output_buffer.seek(0)
        
        self.print_summary_report()
        
        return output_buffer.getvalue(), output_filename
    
    def print_summary_report(self):
        print("\\n" + "="*50)
        print("📋 JOURNAL ENTRY SUMMARY")
        print("="*50)
        
        multi_line_entries = []
        single_line_entries = []
        
        for je_id, entry_data in sorted(self.grouped_entries.items()):
            if len(entry_data['lines']) > 1:
                multi_line_entries.append((je_id, entry_data))
            else:
                single_line_entries.append((je_id, entry_data))
        
        if multi_line_entries:
            print(f"\\n🔗 MULTI-LINE ENTRIES ({len(multi_line_entries)}):")
            for je_id, entry_data in multi_line_entries[:5]:
                lines = entry_data['lines']
                print(f"   {je_id}: {lines['Posted Date'].iloc[0].strftime('%Y-%m-%d')} | {len(lines)} lines | ${entry_data['total_debits']:,.2f}")
            
            if len(multi_line_entries) > 5:
                print(f"   ... and {len(multi_line_entries) - 5} more")
        
        if single_line_entries:
            print(f"\\n📄 SINGLE-LINE ENTRIES ({len(single_line_entries)}):")
            for je_id, entry_data in single_line_entries[:3]:
                lines = entry_data['lines']
                line = lines.iloc[0]
                print(f"   {je_id}: {line['Posted Date'].strftime('%Y-%m-%d')} | {line['Account ID']} | ${line['Debit Amount']:.2f}/${line['Credit Amount']:.2f}")
            
            if len(single_line_entries) > 3:
                print(f"   ... and {len(single_line_entries) - 3} more")
''')

print("✅ Journal Entry Creator loaded successfully!")
print("\n🎯 Ready to process your Excel file!")
print("   ⬇️ Run the next cell to upload your file")


In [None]:
# @title 📁 **STEP 2: Upload Your Excel File** { display-mode: "form" }
# @markdown Click ▶️ to upload your Excel file with journal line data.

print("📁 Upload Your Excel File")
print("=" * 30)
print("Your file should contain:")
print("   ✅ Posted Date column")
print("   ✅ Account ID column") 
print("   ✅ Debit Amount column")
print("   ✅ Credit Amount column")
print("   📋 Optional fields (Description, Reference, etc.)")
print()

# Upload file
uploaded = files.upload()

if uploaded:
    filename = list(uploaded.keys())[0]
    file_data = uploaded[filename]
    print(f"\n✅ Successfully uploaded: {filename}")
    print(f"   📊 File size: {len(file_data):,} bytes")
    
    # Process immediately
    print("\n🚀 Processing your data...")
    print("=" * 40)
    
    creator = JournalEntryCreator()
    
    if creator.load_data_from_uploaded_file(file_data, filename):
        if creator.create_journal_entries():
            output_data, output_filename = creator.generate_output(filename)
            
            print(f"\n🎉 SUCCESS! Your file is ready for download.")
            print(f"   📄 Output filename: {output_filename}")
            print("\n⬇️ Run the next cell to download your results!")
        else:
            print("❌ Failed to create journal entries")
    else:
        print("❌ Failed to load your Excel file")
else:
    print("❌ No file uploaded. Please run this cell again.")


In [None]:
# @title 💾 **STEP 3: Download Your Results** { display-mode: "form" }
# @markdown Click ▶️ to download your processed Excel file with Journal IDs.

if 'output_data' in locals() and 'output_filename' in locals():
    print("💾 Downloading Your Results")
    print("=" * 30)
    
    # Save and download the file
    with open(output_filename, 'wb') as f:
        f.write(output_data)
    
    files.download(output_filename)
    
    print(f"✅ Download started: {output_filename}")
    print("\n📋 Your output file contains:")
    print("   ✅ All your original data")
    print("   ✅ New 'Journal ID' column")
    print("   ✅ Every line has a Journal ID")
    print("   ✅ Balanced journal entries")
    print("   ✅ Zero-amount lines properly assigned")
    
    print("\n🎯 Next Steps:")
    print("   📊 Import into your accounting system")
    print("   📈 Use Journal IDs for reporting")
    print("   🔍 Review the summary above")
    
    print("\n✨ Thank you for using Journal Entry ID Creator!")
    
else:
    print("❌ No processed data available.")
    print("   Please run the previous cell to upload and process your file first.")
