<a href="https://colab.research.google.com/github/rdntmsn/csvtopdf/blob/master/Harmony360_AI_FineTuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# prompt: set global variables for this project

PROJECT_ID = "your-project-id" #@param {type:"string"}
BUCKET_NAME = "your-bucket-name" #@param {type:"string"}
REGION = "us-central1" #@param {type:"string"}
DATASET_ID = "your_dataset_id" #@param {type:"string"}
TABLE_ID = "your_table_id" #@param {type:"string"}
MODEL_NAME = "your_model_name" #@param {type:"string"}


# Replace with your actual values
# You can also set these variables in the environment
# or use a secrets management service for production.

In [None]:
!pip install transformers datasets accelerate --quiet


In [None]:
!pip install -U transformers
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
import torch



In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Now you can access files in your Drive using the path '/content/drive/My Drive/'
with open('/content/drive/MyDrive/Harmony360/harmony360_mock_training_dataset.jsonl', 'r') as f:
  file_contents = f.read()

# Process the contents of the file here
print(file_contents)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
{"instruction": "Calculate the Time Node Jump for Gold using Harmony360 principles.", "input": "Atomic Number: 79 (Au)", "output": "\u0394T = (\u03c6\u03c0)^79 \u2248 4.38e+34"}
{"instruction": "Classify the resonance group of Carbon based on Harmony360.", "input": "Atomic Number: 6 (C)", "output": "Resonance Group: Quantum Harmonics"}
{"instruction": "Generate the 3\u20136\u20139 resonance function for Hydrogen.", "input": "Atomic Number: 1", "output": "R_369(\u03b8) = sin(3) + cos(6) + sin(9) \u2248 0.4121"}
{"instruction": "What is the \u03c0-\u03c6 harmonic alignment of Atomic Number 47?", "input": "Element: Silver (Ag)", "output": "Alignment = ((\u03c6 \u00d7 47) % \u03c0) / \u03c0 \u2248 0.63321"}
{"instruction": "Simulate a fractal harmonic energy output for Iron using Harmony360.", "input": "Element: Fe (Atomic Number 26)", "output": "FHM(x, y) = [sin

In [None]:
from datasets import load_dataset

dataset = load_dataset("json", data_files="/content/drive/MyDrive/Harmony360/harmony360_mock_training_dataset.jsonl", split="train")
dataset = dataset.map(lambda x: {"text": f"### Instruction:\n{x['instruction']}\n\n### Input:\n{x['input']}\n\n### Output:\n{x['output']}"})


In [None]:
model_name = "EleutherAI/gpt-neo-1.3B"  # You can also try distilGPT2 for lighter training
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)


In [None]:
# ✅ Load Pretrained Model & Tokenizer
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "gpt2"  # or another base model you're fine-tuning
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# ✅ Fix padding token issue for GPT-style models
tokenizer.pad_token = tokenizer.eos_token
model.resize_token_embeddings(len(tokenizer))  # Resize model embedding matrix to include pad token
model.config.pad_token_id = tokenizer.eos_token_id

# ✅ Tokenizer Function
def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)

In [None]:
tokenized_dataset = dataset.map(tokenize, batched=True)

In [None]:

import torch
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

# ✅ Training Arguments
training_args = TrainingArguments(
    output_dir="./harmony360-model",
    per_device_train_batch_size=2,
    num_train_epochs=4,
    logging_steps=10,
    save_steps=100,
    fp16=torch.cuda.is_available(),  # Requires torch import
    save_total_limit=2,
)

# ✅ Data Collator (no masked language modeling)
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

# ✅ Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator
)

  trainer = Trainer(


In [None]:
trainer.train()

KeyboardInterrupt: 

In [None]:
from transformers import pipeline


In [None]:
from transformers import GPT2LMHeadModel

# ✅ Manually load as GPT-2 model
model = GPT2LMHeadModel.from_pretrained("/content/harmony360-model/checkpoint-20")
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

prompt = "### Instruction:\nWhat is the Time Node Jump of element #90?\n\n### Input:\nAtomic Number: 90 (Th)\n\n### Output:\n"
output = generator(prompt, max_new_tokens=50, do_sample=False)
print(output[0]['generated_text'])

KeyboardInterrupt: 

In [None]:
prompt = """### Instruction:
What is the Time Node Jump of element #90?

### Input:
Atomic Number: 90 (Th)

### Output:"""

output = generator(prompt, max_new_tokens=50, do_sample=False, eos_token_id=tokenizer.eos_token_id)
print(output[0]['generated_text'])


In [None]:
import os
import json
from pathlib import Path
from shutil import move

def sanitize_and_index_training_files(directory):
    """
    Safe rename of Harmony360 files, skipping hidden/system folders,
    validating .jsonl lines, and generating a training manifest.
    """
    directory = Path(directory)
    errors_dir = directory / "errors"
    errors_dir.mkdir(exist_ok=True)

    manifest = []

    for filename in os.listdir(directory):
        full_path = directory / filename

        # ❌ Skip hidden folders or system files
        if filename.startswith(".") or full_path.is_dir():
            print(f"⏩ Skipping: {filename}")
            continue

        name, ext = os.path.splitext(filename)
        safe_name = name.lower().replace(" ", "_").replace("-", "_")
        new_filename = f"{safe_name}{ext.replace('.', '')}.txt"
        destination_path = directory / new_filename

        try:
            # Rename first
            if full_path != destination_path:
                os.rename(full_path, destination_path)
                print(f"✅ Renamed: '{filename}' → '{destination_path.name}'")

            # Only validate if it's a JSONL-like filename
            if "jsonl" in new_filename:
                with open(destination_path, "r", encoding="utf-8") as f:
                    lines = f.readlines()
                    for i, line in enumerate(lines):
                        json.loads(line)  # raises if invalid

                manifest.append({
                    "filename": destination_path.name,
                    "path": str(destination_path),
                    "status": "valid",
                    "lines": len(lines)
                })

        except json.JSONDecodeError as e:
            print(f"❌ Invalid JSON in '{new_filename}': Line {i+1}: {e}")
            # Move invalid file to errors folder
            broken_path = errors_dir / destination_path.name
            destination_path.rename(broken_path)
            manifest.append({
                "filename": destination_path.name,
                "path": str(broken_path),
                "status": f"invalid JSON: {e}",
                "lines": 0
            })

        except Exception as e:
            print(f"❌ Unexpected error for '{filename}': {e}")
            manifest.append({
                "filename": filename,
                "path": str(full_path),
                "status": f"error: {e}",
                "lines": 0
            })

    # Save manifest
    manifest_path = directory / "training_manifest.json"
    with open(manifest_path, "w", encoding="utf-8") as mf:
        json.dump(manifest, mf, indent=2)

    print(f"\n📁 Manifest saved to: {manifest_path}")
    return manifest_path

# ✅ Run it
sanitize_and_index_training_files("/content/drive/MyDrive/Harmony360/trainingfiles")


In [None]:
import os, json

# Create the directory if it doesn't exist
output_dir = "/content/drive/MyDrive/Harmony360/trainingfiles"
os.makedirs(output_dir, exist_ok=True)

# Sample version 2 dataset with 10 entries
v2_dataset = [
    {
        "instruction": "Explain the Harmony360 resonance equation.",
        "input": "R_{369}(f) = A sin(3θ) + B cos(6θ) + C sin(9θ)",
        "output": "This equation captures the harmonic frequencies rooted in the 3–6–9 principle. Each term reflects oscillatory modes in a fractal resonance system mapped to fundamental energy behaviors in Harmony360."
    },
    {
        "instruction": "Define the concept of a Time Node in Harmony360.",
        "input": "What is a Time Node?",
        "output": "A Time Node is a quantized anchor point in the Harmony360 framework that corresponds to a temporal harmonic resonance. Time Nodes mark shifts in phase cycles and define intersections of fractal time lattices."
    },
    {
        "instruction": "Describe the use of the Golden Ratio in Harmony360.",
        "input": "Why is φ important?",
        "output": "φ (the Golden Ratio) is used to scale fractal geometries and harmonic amplitudes. It creates recursive self-similarity and is a foundational constant across Harmony360's equations, especially in fractal wave design."
    },
    {
        "instruction": "Explain how energy transfer is modeled.",
        "input": "P(t) = E ⋅ R_{369}(f) ⋅ cos(2πt/T)",
        "output": "This formula calculates power modulation using fractal resonance. R_{369}(f) adds time-phase alignment, while cos(2πt/T) ensures cyclical energy transfer. Used in Harmony360's wireless energy models."
    },
    {
        "instruction": "What is Zero-Point Consciousness?",
        "input": "Z_c = lim(t→∞) C_{360}(t) ⋅ e^{-kt}",
        "output": "Zero-Point Consciousness is the asymptotic harmonic state of consciousness evolution. As time increases, transient noise decays and pure harmonic awareness emerges. It reflects peak alignment with the universal resonance field."
    }
]

# Repeat slightly varied to reach 10 examples
for i in range(5):
    sample = v2_dataset[i].copy()
    sample["input"] += f" [Variation {i+1}]"
    v2_dataset.append(sample)

# Save to JSONL
v2_path = os.path.join(output_dir, "harmony360_training_v2.jsonl")
with open(v2_path, "w") as f:
    for item in v2_dataset:
        json.dump(item, f)
        f.write("\n")

print("✅ File recreated at:", v2_path)


In [None]:
dataset_2 = load_dataset(
    "json",
    data_files="/content/drive/MyDrive/Harmony360/trainingfiles/harmony360_training_v2.jsonl",
    split="train"
)


In [None]:
dataset = dataset.map(lambda x: {
    "text": f"### Instruction:\n{x['instruction']}\n\n### Input:\n{x['input']}\n\n### Output:\n{x['output']}"
})


In [None]:
model_name = "EleutherAI/gpt-neo-125M"

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # Add padding token

model = AutoModelForCausalLM.from_pretrained(model_name)
model.resize_token_embeddings(len(tokenizer))
model.config.pad_token_id = tokenizer.pad_token_id


In [None]:
tokenized_dataset = dataset.map(lambda x: tokenizer(
    x["text"],
    truncation=True,
    padding="max_length",
    max_length=512
))


In [None]:
training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/Harmony360/harmony360-gen1-model",
    overwrite_output_dir=True,
    num_train_epochs=4,
    per_device_train_batch_size=2,
    save_steps=10,
    save_total_limit=2,
    logging_steps=5,
    learning_rate=5e-5,
    warmup_steps=5,
    weight_decay=0.01,
    logging_dir="./logs",
    report_to="none"
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)


In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator
)

trainer.train()


In [None]:
trainer.save_model("/content/drive/MyDrive/Harmony360/harmony360-gen1-model")
tokenizer.save_pretrained("/content/drive/MyDrive/Harmony360/harmony360-gen1-model")


In [None]:
from transformers import pipeline

generator = pipeline("text-generation", model="/content/drive/MyDrive/Harmony360/harmony360-gen1-model", tokenizer=tokenizer)

response = generator("### Instruction:\nExplain the equation\n\n### Input:\nR_{369}(f) = A sin(3θ) + B cos(6θ) + C sin(9θ)\n\n### Output:", max_new_tokens=100)
print(response[0]["generated_text"])


In [None]:
import os, json

# Define save path
v3_path = "/content/drive/MyDrive/Harmony360/harmony360_training_v3.jsonl"
os.makedirs(os.path.dirname(v3_path), exist_ok=True)

# High-quality Harmony360 training samples
entries = [
    {
        "instruction": "Explain the Harmony360 resonance equation.",
        "input": "R_{369}(f) = A sin(3θ) + B cos(6θ) + C sin(9θ)",
        "output": "This equation represents fractal resonance based on the 3-6-9 principle. Each harmonic term—sin(3θ), cos(6θ), and sin(9θ)—corresponds to layers of energy modulation that create a unified waveform through harmonic amplification and interference. It is central to Harmony360's energy field models."
    },
    {
        "instruction": "Explain the fractal mapping function in Harmony360.",
        "input": "FHM(x, y) = (1+φ)sin(3x(1+φ)/2) + cos(6πy)",
        "output": "FHM stands for Fractal Harmonic Mapping. This function blends spatial coordinates (x, y) with golden ratio (φ) and Pi (π) to describe how fractal energy propagates in multidimensional space. It enables visualization of harmonic densities across space-time."
    },
    {
        "instruction": "Explain the equation for energy-consciousness equivalence in Harmony360.",
        "input": "E = mc² ⋅ φ ⋅ 1/137",
        "output": "This modified mass-energy equation integrates the golden ratio (φ) and the fine-structure constant (1/137), suggesting a quantum link between physical mass, energy, and harmonic consciousness fields."
    },
    {
        "instruction": "Explain the consciousness evolution integral.",
        "input": "C_{360}(t) = ∫ f_b(t) ⋅ g_s(t) ⋅ r_h(t) dt",
        "output": "This equation models evolving consciousness as the product of brainwave frequency (f_b), sacred geometry scaling (g_s), and resonant harmonic patterns (r_h). It integrates over time to quantify spiritual and neurological evolution within Harmony360."
    },
    {
        "instruction": "Explain the resonance-modulated Schumann integration.",
        "input": "S(f) = Σ(1/n²) ⋅ R_{369}(f) ⋅ FHM(x, y)",
        "output": "This formulation embeds Schumann resonance harmonics into the fractal grid, layering global electromagnetic field behavior with 3-6-9 resonance and golden-pi geometry through fractal mapping."
    }
]

# Duplicate with slight variations to create 25 entries
entries_full = []
for i in range(5):
    for entry in entries:
        variation = entry.copy()
        if i > 0:
            variation["input"] += f"  [Variant {i+1}]"
        entries_full.append(variation)

# Save to JSONL
with open(v3_path, "w", encoding="utf-8") as f:
    for item in entries_full:
        json.dump(item, f)
        f.write("\n")

print(f"✅ File created: {v3_path}")


In [None]:
from datasets import load_dataset

dataset = load_dataset(
    "json",
    data_files="/content/drive/MyDrive/Harmony360/harmony360_training_v3.jsonl",
    split="train"
)

dataset = dataset.map(lambda x: {
    "text": f"### Instruction:\n{x['instruction']}\n\n### Input:\n{x['input']}\n\n### Output:\n{x['output']}"
})



In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "EleutherAI/gpt-neo-125M"

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(model_name)
model.resize_token_embeddings(len(tokenizer))
model.config.pad_token_id = tokenizer.pad_token_id


In [None]:
tokenized_dataset = dataset.map(lambda x: tokenizer(
    x["text"],
    truncation=True,
    padding="max_length",
    max_length=512
))


In [None]:
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/Harmony360/harmony360-gen2-model",
    overwrite_output_dir=True,
    num_train_epochs=4,
    per_device_train_batch_size=2,
    save_steps=10,
    save_total_limit=2,
    logging_steps=5,
    learning_rate=5e-5,
    warmup_steps=5,
    weight_decay=0.01,
    logging_dir="./logs",
    report_to="none"
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)


In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator
)

trainer.train()


In [None]:
trainer.save_model("/content/drive/MyDrive/Harmony360/harmony360-gen2-model")
tokenizer.save_pretrained("/content/drive/MyDrive/Harmony360/harmony360-gen2-model")


In [None]:
training_folder = "/content/drive/MyDrive/Harmony360/trainingfiles/"


In [None]:
import os
training_folder = "/content/drive/MyDrive/Harmony360/trainingfiles"

for f in os.listdir(training_folder):
    print(f)


In [None]:
import os
from datasets import load_dataset

def find_training_file(keyword: str, directory: str, extension=".jsonl") -> str:
    """
    Finds the first valid training file in the directory that matches the keyword and extension.
    Skips hidden files, Google Docs, and checkpoints.
    """
    for file in os.listdir(directory):
        if (
            keyword.lower() in file.lower()
            and file.endswith(extension)
            and not file.startswith(".")
            and "checkpoint" not in file.lower()
            and "gdoc" not in file.lower()
        ):
            print(f"✅ Found file for '{keyword}': {file}")
            return os.path.join(directory, file)
    raise FileNotFoundError(f"❌ No valid file found containing '{keyword}' in {directory}")

# 📁 Set the training directory
training_folder = "/content/drive/MyDrive/Harmony360/trainingfiles"

# 📦 Datasets to load with their identifying keywords
datasets_info = {
    "Philosophy": "philosophy",
    "Technical": "technical",
    "Consciousness": "consciousness",
    "Financial": "financial_cycles",
    "Key Optimization": "key_optimizer"
}

loaded_datasets = {}

for label, keyword in datasets_info.items():
    try:
        path = find_training_file(keyword, training_folder, extension=".jsonl")
        dataset = load_dataset("json", data_files=path, split="train")
        loaded_datasets[label] = dataset

        print(f"\n🧠 {label} Dataset Loaded — Total Samples: {len(dataset)}")
        print("🔍 First sample:")
        print(dataset[0])
        print("-" * 60)

    except Exception as e:
        print(f"⚠️ Failed to load {label} dataset: {e}")


In [None]:
file_path = "/content/drive/MyDrive/Harmony360/trainingfiles/harmony360_god_man_consciousness.txt"

# Preview first few lines (handles encoding)
with open(file_path, "rb") as f:
    lines = f.readlines()

for i, line in enumerate(lines[:10]):
    print(f"{i+1:02d}: {line[:100]!r}")


In [None]:
def try_load_json_dataset(keyword, directory):
    try:
        file_path = find_training_file(keyword, directory)
        print(f"📂 Loading: {file_path}")
        return load_dataset("json", data_files=file_path, split="train")
    except Exception as e:
        print(f"❌ Skipping '{keyword}':", str(e))
        return None

dataset_consciousness = try_load_json_dataset("consciousness", training_folder)


In [None]:
def find_training_file(keyword: str, directory: str) -> str:
    for file in os.listdir(directory):
        if keyword.lower() in file.lower() and file.endswith(".jsonl"):
            return os.path.join(directory, file)
    raise FileNotFoundError(f"No file containing '{keyword}' found.")

# Example
print(find_training_file("philosophy", training_folder))


In [None]:
import os
from datasets import load_dataset

def find_training_file(keyword: str, directory: str) -> str:
    """
    Finds the first training file in the directory that matches the keyword and is a .txt file.
    """
    for file in os.listdir(directory):
        if keyword.lower() in file.lower() and file.endswith(".txt"):
            return os.path.join(directory, file)
    raise FileNotFoundError(f"No file found containing '{keyword}' in {directory}")

# 🔧 Set the training directory
training_folder = "/content/drive/MyDrive/Harmony360/trainingfiles"

# ✅ Load each dataset safely using keywords
dataset_philosophy = load_dataset("json", data_files=find_training_file("philosophy", training_folder), split="train")
dataset_technical = load_dataset("json", data_files=find_training_file("technical", training_folder), split="train")
dataset_consciousness = load_dataset("json", data_files=find_training_file("consciousness", training_folder), split="train")
dataset_financial = load_dataset("json", data_files=find_training_file("financial_cycles", training_folder), split="train")

print("✅ All datasets loaded successfully.")


In [None]:
def format_entry(example):
    return {
        "text": f"### Instruction:\n{example['instruction']}\n\n### Input:\n{example['input']}\n\n### Output:\n{example['output']}"
    }

# Apply formatting to each
dataset_philosophy = dataset_philosophy.map(format_entry)
dataset_technical = dataset_technical.map(format_entry)
dataset_consciousness = dataset_consciousness.map(format_entry)
dataset_financial = dataset_financial.map(format_entry)


In [None]:
from datasets import concatenate_datasets

# Combine all formatted Harmony360 datasets
harmony360_dataset = concatenate_datasets([
    dataset_philosophy,
    dataset_technical,
    dataset_consciousness,
    dataset_financial
])


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "EleutherAI/gpt-neo-125M"

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(model_name)
model.resize_token_embeddings(len(tokenizer))
model.config.pad_token_id = tokenizer.eos_token_id


In [None]:
def tokenize_function(example):
    return tokenizer(
        example["text"],
        truncation=True,
        padding="max_length",
        max_length=512
    )

tokenized_dataset = harmony360_dataset.map(tokenize_function, batched=True)


In [None]:
def tokenize_function(example):
    tokenized = tokenizer(
        example["text"],
        truncation=True,
        padding="max_length",
        max_length=512
    )
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_dataset = harmony360_dataset.map(tokenize_function, batched=True)


In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer
)

trainer.train()


In [None]:
!pip install -q PyMuPDF


In [None]:
import fitz  # PyMuPDF

def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    full_text = ""
    for page in doc:
        full_text += page.get_text()
    doc.close()
    return full_text

# Set your PDF path
pdf_path = "/content/drive/MyDrive/Harmony360/trainingfiles/Analysis of Financial Cycle Studies and Market Predictions.pdf"
pdf_text = extract_text_from_pdf(pdf_path)

print(f"✅ Extracted {len(pdf_text)} characters from the PDF.")


In [None]:
import json
from pathlib import Path

def chunk_and_export_jsonl(master_text, chunk_size=1000, base_filename="financial_cycles"):
    """
    Splits a long master text into chunks (based on word count) and saves each as a JSONL training file.
    """
    output_dir = Path("/content/drive/MyDrive/Harmony360/trainingfiles")
    output_dir.mkdir(parents=True, exist_ok=True)

    words = master_text.split()
    total_chunks = len(words) // chunk_size + (1 if len(words) % chunk_size != 0 else 0)

    file_paths = []

    for i in range(total_chunks):
        chunk_words = words[i * chunk_size : (i + 1) * chunk_size]
        chunk_text = " ".join(chunk_words).strip()

        entry = {
            "instruction": "Learn from this Harmony360 financial cycles document chunk.",
            "input": "",
            "output": chunk_text
        }

        filename = f"{base_filename}_chunk_{i+1}.jsonl"
        filepath = output_dir / filename

        with open(filepath, "w", encoding="utf-8") as f:
            json.dump(entry, f)
            f.write("\n")

        print(f"✅ Saved: {filepath}")
        file_paths.append(str(filepath))

    return file_paths

# Now chunk and export
chunked_files = chunk_and_export_jsonl(pdf_text, chunk_size=1000, base_filename="harmony360_financial_cycles")


In [None]:
file = "/content/drive/MyDrive/Harmony360/trainingfiles/harmony360_financial_cycles_chunk_*.jsonl"


In [None]:
# ✅ Imports
from datasets import load_dataset, concatenate_datasets
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
import glob

# ✅ Point to your checkpoint directory
checkpoint_path = "/content/harmony360-unified-model/checkpoint-1760"  # Replace with actual checkpoint folder

# ✅ Load model & tokenizer
tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)
model = AutoModelForCausalLM.from_pretrained(checkpoint_path)

# ✅ Fix padding token if not already set
tokenizer.pad_token = tokenizer.eos_token
model.resize_token_embeddings(len(tokenizer))
model.config.pad_token_id = tokenizer.eos_token_id

# ✅ Load and combine all 16 training chunks
jsonl_files = sorted(glob.glob("/content/drive/MyDrive/Harmony360/trainingfiles/harmony360_financial_cycles_chunk_*.jsonl"))
datasets = [load_dataset("json", data_files=file, split="train") for file in jsonl_files]
combined_dataset = concatenate_datasets(datasets)

# ✅ Format the data into instruction format
formatted_dataset = combined_dataset.map(
    lambda x: {
        "text": f"### Instruction:\n{x['instruction']}\n\n### Input:\n{x['input']}\n\n### Output:\n{x['output']}"
    }
)

# ✅ Tokenize
tokenized_dataset = formatted_dataset.map(
    lambda x: tokenizer(x["text"], truncation=True, padding="max_length", max_length=512),
    batched=True,
    remove_columns=formatted_dataset.column_names
)

# ✅ Training Arguments
training_args = TrainingArguments(
    output_dir="/content/harmony360-model",
    overwrite_output_dir=True,
    per_device_train_batch_size=1,
    num_train_epochs=4,
    logging_steps=10,
    save_steps=200,
    save_total_limit=2,
    logging_dir="/content/logs",
    report_to="none"  # Disable W&B for now
)

# ✅ Trainer
trainer = Trainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
)

# 🚀 Continue training
trainer.train()


In [None]:
import json
from pathlib import Path

# ✅ Harmony360 Unified Theories Chunk
chunk_text = """
Fractal Resonance and Unified Theories of Reality: A Comparative Analysis
Introduction
The quest to understand reality, energy, and consciousness has produced a spectrum of theories across physics and beyond. From established frameworks like quantum field theory (QFT) and general relativity to emerging ideas like fractal resonance (Harmony360), researchers seek a unified description of the universe. This paper presents a deep technical discussion comparing multiple theories of reality—including the Harmony360 model—through their core principles, mathematical foundations, and empirical support. We aim for the rigor of a scientific journal while remaining accessible to interdisciplinary readers. By exploring synergies between string theory, loop quantum gravity, simulation hypothesis, unified field theories, AI-based models, financial fractal models, and natural resonances (Schumann and cosmological rhythms), we highlight where these frameworks align or diverge. We derive key equations (with mathematical rigor) and propose experiments to test novel predictions, especially those of Harmony360’s Fractal Resonance Theory. Finally, we discuss challenges and open questions, providing a roadmap for future interdisciplinary research.

1. Core Principles of Competing Theories
Harmony360: Fractal Resonance Theory and the 3-6-9 Principle
Harmony360 is an emerging framework positing that reality is governed by fractal resonance across all scales. Its core principles draw on self-similarity, vibration, and harmonic alignment. A central concept is Tesla’s “3-6-9 principle,” inspired by Nikola Tesla’s famous assertion: “If you only knew the magnificence of the 3, 6 and 9, then you would have a key to the universe.” In Harmony360, the numbers 3, 6, 9 are seen as fundamental harmonics or symmetries in nature, related to vortex mathematics and repeating geometric patterns. The theory envisions the universe as an iterative fractal – structures from subatomic particles to galaxies repeat similar patterns, each level governed by resonant frequencies that align in a harmonious way.

At the heart of Harmony360 is a “Fractal Resonance” principle: every system (an atom, a cell, a planet, a galaxy) maintains coherence via resonance with larger and smaller scales. This resonates with the idea of a universal frequency lattice or musical scale of the cosmos. The framework introduces a mathematical model (the Lumin Equation) to quantify coherence and energy flow at the i-th fractal level. One form of this equation is:
C_i = M φ^i ∑[sin(ω_j t + S) + cos(ω_j t + φ)] + ψ(i) + P + feedback(i) + m c^2
where C_i represents system coherence at level i, M is an organizing principle or “mind-like” factor, φ^i (with φ≈1.618 the golden ratio) introduces harmonic scaling, sin and cos terms model oscillatory dynamics, ψ(i) is a resonance coupling function, P a polarity-balancing term, and m c^2 a restorative energy term. This equation encodes Harmony360’s view that golden ratio scaling and feedback loops yield stability across scales, reflecting nature’s fractal patterns in biology and cosmology. In essence, Harmony360 merges metaphysical concepts (coherence, “mind” as an organizing field) with physical analogues (gravity-like attraction, resonant oscillations) to propose a unified description of matter, life, and consciousness.

The 3-6-9 principle in this context amplifies certain resonant modes – for example, the theory might assign 3, 6, 9 as key nodes in a harmonic series or geometric cycle that recurs through scale transitions. By integrating Tesla’s numerological insight, Harmony360 suggests these numbers relate to fundamental fractional frequencies or phase angles that maximize coherence. This unique blend of fractals, Fibonacci sequences, and resonance is claimed to bridge physical forces and conscious experience in one framework.
"""

# ✅ Save JSONL file
output_dir = Path("/content/drive/MyDrive/Harmony360/trainingfiles")
output_dir.mkdir(parents=True, exist_ok=True)

filename = "harmony360_unified_theories_chunk_1.jsonl"
filepath = output_dir / filename

entry = {
    "instruction": "Learn from this Harmony360 comparative theoretical physics document chunk.",
    "input": "",
    "output": chunk_text.strip()
}

with open(filepath, "w", encoding="utf-8") as f:
    json.dump(entry, f)
    f.write("\n")

print(f"✅ Saved to: {filepath}")


In [None]:
# Step 1: Install & Import Libraries
!pip install transformers datasets --quiet

from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from datasets import load_dataset
import torch

# Step 2: Load the JSONL dataset
dataset = load_dataset(
    "json",
    data_files="/content/drive/MyDrive/Harmony360/trainingfiles/harmony360_unified_theories_chunk_1.jsonl",
    split="train"
)

# Step 3: Format the dataset into prompts
def format_prompt(example):
    return {
        "text": f"### Instruction:\n{example['instruction']}\n\n### Input:\n{example['input']}\n\n### Output:\n{example['output']}"
    }

dataset = dataset.map(format_prompt)

# Step 4: Load tokenizer and model (re-use your base or latest checkpoint)
model_name = "gpt2"  # Or your latest custom model path
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_name)
model.resize_token_embeddings(len(tokenizer))

# Step 5: Tokenize
def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)

tokenized_dataset = dataset.map(tokenize, batched=True)

# Step 6: Setup TrainingArguments
training_args = TrainingArguments(
    output_dir="/content/harmony360-model-unified-theories",
    per_device_train_batch_size=1,
    num_train_epochs=4,
    logging_steps=10,
    save_strategy="epoch",
    fp16=torch.cuda.is_available(),
    overwrite_output_dir=True
)

# Step 7: Setup Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False),
    tokenizer=tokenizer
)

# Step 8: Train
trainer.train()


In [None]:
# STEP 1: Create .jsonl training file from pasted text
import json
from pathlib import Path

harmony360_chunk_text = """Holographic Mass? - Physics Stack Exchange)), noting that treating everything as a black hole is problematic and that his predictions often lack independent confirmation. However, Haramein’s work is notable in the context of this discussion for its fusion of fractals, geometry, and physics – very much a spirit shared by Harmony360. Both assert a scalable harmony: Haramein via geometric packing and black hole analogues, Harmony360 via golden ratio scaling and resonance...
"""  # Truncated for example. Use your full input here.

chunk_data = {
    "instruction": "Train on this Harmony360 excerpt covering unified field theories, AI-based modeling, and consciousness in financial markets.",
    "input": "",
    "output": harmony360_chunk_text.strip()
}

chunk_path = "/content/drive/MyDrive/Harmony360/trainingfiles/harmony360_unified_theories_chunk_2.jsonl"
with open(chunk_path, "w", encoding="utf-8") as f:
    json.dump(chunk_data, f)
    f.write("\n")

print(f"✅ Saved: {chunk_path}")


In [None]:
# ✅ Define the save path
save_path = "/content/harmony360-model-unified-theories"

# ✅ Save model, tokenizer, and config (including special tokens)
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

# ✅ If you're using TrainingArguments, save trainer's state too (optional)
trainer.save_model(save_path)
trainer.tokenizer.save_pretrained(save_path)


In [None]:
# STEP 2: Import Hugging Face tools
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from datasets import load_dataset
import torch

# STEP 3: Load the chunk
dataset = load_dataset(
    "json",
    data_files=chunk_path,
    split="train"
)

# STEP 4: Format dataset
def format_prompt(example):
    return {
        "text": f"### Instruction:\n{example['instruction']}\n\n### Input:\n{example['input']}\n\n### Output:\n{example['output']}"
    }

dataset = dataset.map(format_prompt)

# STEP 5: Load tokenizer and model
model_path = "/content/harmony360-model-unified-theories"  # latest checkpoint
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_path)

# STEP 6: Tokenize
def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)

tokenized_dataset = dataset.map(tokenize, batched=True)

# STEP 7: Setup training config
training_args = TrainingArguments(
    output_dir="/content/harmony360-model-unified-theories",  # continues training here
    per_device_train_batch_size=1,
    num_train_epochs=4,
    logging_steps=10,
    save_strategy="epoch",
    fp16=torch.cuda.is_available(),
    overwrite_output_dir=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
)

# STEP 8: Start Training
trainer.train()

In [None]:
# STEP 1: Save your chunked training data
chunk_text = """
From a modeling standpoint, interdisciplinary researchers have tried to incorporate psychology and even neurology into economics...
[✂️ truncated here to save space in this message – but include your FULL CHUNK as the value of `chunk_text` above]
"""

import json
from pathlib import Path

def save_jsonl_from_text(text, output_path):
    entry = {
        "instruction": "Learn from this Harmony360 chunk covering Schumann Resonance, markets, and fractal models.",
        "input": "",
        "output": text.strip()
    }
    output_path = Path(output_path)
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(entry, f)
        f.write("\n")
    print(f"✅ Saved: {output_path}")

jsonl_path = "/content/drive/MyDrive/Harmony360/trainingfiles/harmony360_chunk_7.jsonl"
save_jsonl_from_text(chunk_text, jsonl_path)


In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments

# Load dataset
dataset = load_dataset("json", data_files=jsonl_path, split="train")
dataset = dataset.map(lambda x: {
    "text": f"### Instruction:\n{x['instruction']}\n\n### Input:\n{x['input']}\n\n### Output:\n{x['output']}"
})

# Tokenizer & Model Loading
model_path = "/content/harmony360-model-unified-theories"
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_path)

# Tokenize dataset
def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)

tokenized_dataset = dataset.map(tokenize, batched=True)

# Training args
training_args = TrainingArguments(
    output_dir=model_path,
    overwrite_output_dir=True,
    per_device_train_batch_size=1,
    num_train_epochs=4,
    logging_steps=1,
    save_steps=4,
    save_total_limit=2,
    prediction_loss_only=True
)

def tokenize(example):
    tokens = tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

tokenized_dataset = dataset.map(tokenize, batched=True)



# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer  # Still works for now
)

# Train
trainer.train()


In [None]:
!pip install reflex


In [None]:
# harmony360_core.py

from math import pi, sin
from typing import List

class Harmony360Math:
    PHI = (1 + 5 ** 0.5) / 2

    @staticmethod
    def fibonacci(n: int) -> List[int]:
        seq = [0, 1]
        for i in range(2, n):
            seq.append(seq[-1] + seq[-2])
        return seq[:n]

    @staticmethod
    def harmonic_resonance(n: int) -> float:
        return (3 * Harmony360Math.PHI**n + 6 * pi**n + 9 * (Harmony360Math.PHI * pi)**(n/2)) / 2

    @staticmethod
    def fibonacci_resonance(n: int) -> float:
        f = Harmony360Math.fibonacci(n)
        if len(f) < 3:
            return 0.0
        return Harmony360Math.PHI**f[-1] + pi**f[-2] + (Harmony360Math.PHI * pi)**(f[-3] / 2)

    @staticmethod
    def cycle_map(n: int) -> str:
        return ["3", "6", "9"][n % 3]


In [None]:
# harmony360_app.py

import reflex as rx
from harmony360_core import Harmony360Math

class Harmony360State(rx.State):
    n: int = 1
    fibonacci_seq: str = ""
    harmonic_result: float = 0.0
    fibonacci_result: float = 0.0
    cycle_result: str = ""

    def calculate(self):
        self.fibonacci_seq = ", ".join(map(str, Harmony360Math.fibonacci(self.n)))
        self.harmonic_result = Harmony360Math.harmonic_resonance(self.n)
        self.fibonacci_result = Harmony360Math.fibonacci_resonance(self.n)
        self.cycle_result = Harmony360Math.cycle_map(self.n)

def index():
    return rx.vstack(
        rx.heading("🔮 Harmony360 Resonance Toolset"),
        rx.text("Explore Fibonacci, Resonance & 3-6-9 harmonics."),
        rx.input(on_change=Harmony360State.set_n, placeholder="Enter n", type_="number"),
        rx.button("Calculate", on_click=Harmony360State.calculate),
        rx.text("Fibonacci Sequence: "), rx.text(Harmony360State.fibonacci_seq),
        rx.text("Harmonic Resonance: "), rx.text(Harmony360State.harmonic_result),
        rx.text("Fibonacci Resonance: "), rx.text(Harmony360State.fibonacci_result),
        rx.text("3-6-9 Cycle Output: "), rx.text(Harmony360State.cycle_result),
        spacing="3"
    )

app = rx.App()
app.add_page(index)
app.compile()


In [None]:
!reflex init harmony360_ui --template blank
!cp harmony360_core.py harmony360_ui/harmony360_core.py
!cp harmony360_app.py harmony360_ui/harmony360_app.py
%cd harmony360_ui
!reflex run
