In [1]:
%run ~/Prompt_Engineering/setup_github.py

SETTING UP GITHUB-READY PROJECT STRUCTURE

1. Directories created

2. Notebooks:
   Copied 01_Dataset_Preparation.ipynb -> training/01_Dataset_Preparation.ipynb
   Copied 02_Model_Training.ipynb -> training/02_Model_Training.ipynb

3. Dataset:
   Copied train.jsonl
   Copied val.jsonl
   Copied test.jsonl

4. Report:
   Copied report/technical_report.pdf -> docs/

5. Results check:
   ✅ results/config_a/final_model/adapter_model.safetensors
   ✅ results/config_a/training_logs.json
   ✅ results/config_b/training_logs.json
   ✅ results/config_c/training_logs.json
   ✅ results/config_d/training_logs.json
   ✅ results/evaluation_metrics.json
   ✅ results/evaluation_charts.png
   ✅ results/hp_comparison_chart.png
   ✅ results/error_analysis.json
   ✅ results/zero_shot_results.json
   ✅ results/few_shot_results.json
   ✅ results/finetuned_results.json

6. Source files check:
   ❌ MISSING - upload this file! src/rag_pipeline.py
   ❌ MISSING - upload this file! src/inference_pipeline.py
   ❌ M

In [2]:
# Create src/ files by copying from where you uploaded them
import os

# Check where the files actually landed
for name in ["rag_pipeline.py", "inference_pipeline.py", "rag_app.py"]:
    for loc in [f"/home/naik.vat/Prompt_Engineering/{name}", 
                f"/home/naik.vat/{name}",
                f"/home/naik.vat/Prompt_Engineering/src/{name}"]:
        if os.path.exists(loc):
            print(f"Found: {loc}")

Found: /home/naik.vat/Prompt_Engineering/rag_pipeline.py
Found: /home/naik.vat/Prompt_Engineering/inference_pipeline.py


In [3]:
import shutil, os

proj = os.path.expanduser("~/Prompt_Engineering")

# Move src files to correct location
os.makedirs(f"{proj}/src", exist_ok=True)
shutil.copy2(f"{proj}/rag_pipeline.py", f"{proj}/src/rag_pipeline.py")
shutil.copy2(f"{proj}/inference_pipeline.py", f"{proj}/src/inference_pipeline.py")

# Create rag_app.py (was never uploaded)
rag_app_code = '''"""
RAG-Enhanced Gradio Demo
Run with: python src/rag_app.py (from project root, requires GPU)
"""
import json
import gradio as gr
from rag_pipeline import RAGPolicyGenerator, detect_services, get_relevant_actions

generator = RAGPolicyGenerator("results/config_a/final_model")

def generate_with_ui(description, max_tokens, temperature, use_rag):
    services = detect_services(description)
    services_str = ", ".join(services) if services else "None detected"
    rag_context = get_relevant_actions(services, description) if use_rag and services else ""
    result = generator.generate(description, max_tokens=int(max_tokens), temperature=temperature, use_rag=use_rag)
    if result["valid_json"]:
        policy_str = json.dumps(result["policy"], indent=2)
        status = "Valid IAM Policy"
    else:
        policy_str = result["raw_output"]
        status = "Invalid JSON - may need manual correction"
    info = f"Services detected: {services_str}\\nRAG context: {\\'Injected\\' if rag_context else \\'None\\'}"
    return policy_str, status, info

demo = gr.Interface(
    fn=generate_with_ui,
    inputs=[
        gr.Textbox(label="Describe the IAM policy you need", lines=3),
        gr.Slider(minimum=128, maximum=1024, value=512, step=64, label="Max Tokens"),
        gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.05, label="Temperature"),
        gr.Checkbox(value=True, label="Enable RAG"),
    ],
    outputs=[
        gr.Code(label="Generated IAM Policy", language="json"),
        gr.Textbox(label="Status"),
        gr.Textbox(label="RAG Info"),
    ],
    title="AWS IAM Policy Generator (RAG-Enhanced)",
    examples=[
        ["Allow read-only access to S3 bucket named customer-data", 512, 0.1, True],
        ["Allow a Lambda function to read from DynamoDB and write logs to CloudWatch", 512, 0.1, True],
        ["Deny all S3 delete operations across all buckets", 512, 0.1, True],
    ],
)

if __name__ == "__main__":
    demo.launch(share=True)
'''

with open(f"{proj}/src/rag_app.py", "w") as f:
    f.write(rag_app_code)

# Create .gitignore
with open(f"{proj}/.gitignore", "w") as f:
    f.write("""__pycache__/
*.py[cod]
.ipynb_checkpoints/
results/config_*/checkpoint-*/
wandb/
iam-finetuning/dataset/raw/
iam-finetuning/dataset/synthetic_batches/
.env
.venv/
.aws/
.DS_Store
*.tar.gz
*.log
""")

# Create .gitattributes
with open(f"{proj}/.gitattributes", "w") as f:
    f.write("*.safetensors filter=lfs diff=lfs merge=lfs -text\n")

# Create LICENSE
with open(f"{proj}/LICENSE", "w") as f:
    f.write("""MIT License

Copyright (c) 2026 Vatsal Naik

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
""")

# Verify everything
print("Final check:")
for f in ["src/rag_pipeline.py", "src/inference_pipeline.py", "src/rag_app.py",
          ".gitignore", ".gitattributes", "LICENSE", "README.md", "requirements.txt"]:
    exists = os.path.exists(os.path.join(proj, f))
    print(f"  {'✅' if exists else '❌'} {f}")

Final check:
  ✅ src/rag_pipeline.py
  ✅ src/inference_pipeline.py
  ✅ src/rag_app.py
  ✅ .gitignore
  ✅ .gitattributes
  ✅ LICENSE
  ✅ README.md
  ✅ requirements.txt


In [4]:
import os
proj = os.path.expanduser("~/Prompt_Engineering")
for f in ["rag_pipeline.py", "inference_pipeline.py", "rag_app.py"]:
    root_copy = os.path.join(proj, f)
    if os.path.exists(root_copy):
        os.remove(root_copy)
        print(f"Deleted {f} from root")

Deleted rag_pipeline.py from root
Deleted inference_pipeline.py from root


In [5]:
%%bash
cd ~/Prompt_Engineering
git init
git add .
git status

hint: Using 'master' as the name for the initial branch. This default branch name
hint: is subject to change. To configure the initial branch name to use in all
hint: 
hint: 	git config --global init.defaultBranch <name>
hint: 
hint: Names commonly chosen instead of 'master' are 'main', 'trunk' and
hint: 'development'. The just-created branch can be renamed via this command:
hint: 
hint: 	git branch -m <name>


Initialized empty Git repository in /home/naik.vat/Prompt_Engineering/.git/
On branch master

No commits yet

Changes to be committed:
  (use "git rm --cached <file>..." to unstage)
	new file:   .gitattributes
	new file:   .gitignore
	new file:   01_Dataset_Preparation.ipynb
	new file:   02_Model_Training.ipynb
	new file:   Demo.ipynb
	new file:   Demo2.ipynb
	new file:   LICENSE
	new file:   README.md
	new file:   Untitled.ipynb
	new file:   data/processed/test.jsonl
	new file:   data/processed/train.jsonl
	new file:   data/processed/val.jsonl
	new file:   docs/technical_report.pdf
	new file:   iam-finetuning/dataset/processed/all_validated_pairs.json
	new file:   iam-finetuning/dataset/processed/test.jsonl
	new file:   iam-finetuning/dataset/processed/train.jsonl
	new file:   iam-finetuning/dataset/processed/val.jsonl
	new file:   inference_app.py
	new file:   rag_app.ipynb
	new file:   report/technical_report.pdf
	new file:   requirements.txt
	new file:   results/comparison_examples

In [6]:
%%bash
cd ~/Prompt_Engineering

# Unstage everything first
git reset

# Remove junk files from tracking
git rm --cached -f Demo.ipynb Demo2.ipynb Untitled.ipynb test_model.ipynb rag_app.ipynb 2>/dev/null
git rm --cached -f setup_github.py inference_app.py 2>/dev/null
git rm --cached -f 01_Dataset_Preparation.ipynb 02_Model_Training.ipynb 2>/dev/null
git rm --cached -rf iam-finetuning/ report/ 2>/dev/null
git rm --cached -f results/config_b/final_model/adapter_model.safetensors 2>/dev/null
git rm --cached -f results/config_c/final_model/adapter_model.safetensors 2>/dev/null
git rm --cached -f results/config_d/final_model/adapter_model.safetensors 2>/dev/null

# Add junk files to .gitignore
cat >> .gitignore << 'EOF'
Demo.ipynb
Demo2.ipynb
Untitled.ipynb
test_model.ipynb
rag_app.ipynb
setup_github.py
inference_app.py
01_Dataset_Preparation.ipynb
02_Model_Training.ipynb
iam-finetuning/
report/
results/config_b/final_model/adapter_model.safetensors
results/config_c/final_model/adapter_model.safetensors
results/config_d/final_model/adapter_model.safetensors
EOF

# Now add only what we want
git add .
git status

On branch master

No commits yet

Changes to be committed:
  (use "git rm --cached <file>..." to unstage)
	new file:   .gitattributes
	new file:   .gitignore
	new file:   LICENSE
	new file:   README.md
	new file:   data/processed/test.jsonl
	new file:   data/processed/train.jsonl
	new file:   data/processed/val.jsonl
	new file:   docs/technical_report.pdf
	new file:   requirements.txt
	new file:   results/comparison_examples.json
	new file:   results/config_a/README.md
	new file:   results/config_a/final_model/README.md
	new file:   results/config_a/final_model/adapter_config.json
	new file:   results/config_a/final_model/adapter_model.safetensors
	new file:   results/config_a/final_model/tokenizer.json
	new file:   results/config_a/final_model/tokenizer_config.json
	new file:   results/config_a/final_model/training_args.bin
	new file:   results/config_a/summary.json
	new file:   results/config_a/training_logs.json
	new file:   results/config_b/README.md
	new file:   results/config_b/f

In [8]:
%%bash
cd ~/Prompt_Engineering
git config user.email "naik.vat@northeastern.edu"
git config user.name "Vatsal Naik"
git commit -m "Fine-tuned Mistral-7B for AWS IAM policy generation with RAG"
git branch -M main
git push -u origin main

[main (root-commit) 9f2d04e] Fine-tuned Mistral-7B for AWS IAM policy generation with RAG
 51 files changed, 1111389 insertions(+)
 create mode 100644 .gitattributes
 create mode 100644 .gitignore
 create mode 100644 LICENSE
 create mode 100644 README.md
 create mode 100644 data/processed/test.jsonl
 create mode 100644 data/processed/train.jsonl
 create mode 100644 data/processed/val.jsonl
 create mode 100644 docs/technical_report.pdf
 create mode 100644 requirements.txt
 create mode 100644 results/comparison_examples.json
 create mode 100644 results/config_a/README.md
 create mode 100644 results/config_a/final_model/README.md
 create mode 100644 results/config_a/final_model/adapter_config.json
 create mode 100644 results/config_a/final_model/adapter_model.safetensors
 create mode 100644 results/config_a/final_model/tokenizer.json
 create mode 100644 results/config_a/final_model/tokenizer_config.json
 create mode 100644 results/config_a/final_model/training_args.bin
 create mode 100644


error: unable to read askpass response from '/usr/libexec/openssh/gnome-ssh-askpass'
fatal: could not read Username for 'https://github.com': No such device or address


CalledProcessError: Command 'b'cd ~/Prompt_Engineering\ngit config user.email "naik.vat@northeastern.edu"\ngit config user.name "Vatsal Naik"\ngit commit -m "Fine-tuned Mistral-7B for AWS IAM policy generation with RAG"\ngit branch -M main\ngit push -u origin main\n'' returned non-zero exit status 128.

In [10]:
%%bash
cd ~/Prompt_Engineering
git remote set-url origin https://github.com/naik-vatsal/iam-policy-generator.git
echo "URL cleaned - token removed"

URL cleaned - token removed


In [11]:
%%bash
cd ~/Prompt_Engineering
echo "*.safetensors" >> .gitignore
git rm --cached results/config_a/final_model/adapter_model.safetensors
git add .gitignore
git commit -m "Remove large model file, add to gitignore"

rm 'results/config_a/final_model/adapter_model.safetensors'
[main 01fb696] Remove large model file, add to gitignore
 2 files changed, 1 insertion(+)
 delete mode 100644 results/config_a/final_model/adapter_model.safetensors


In [13]:
%%bash
cd ~/Prompt_Engineering
git remote set-url origin https://github.com/naik-vatsal/iam-policy-generator.git
echo "Token removed from config"

Token removed from config
