In [1]:
import json
import csv
from datetime import datetime
import glob
import pandas as pd

In [2]:
# Define pattern keywords
persona_keywords = ["you are", "act as", "pretend to be", "pretend you are"]
recipe_keywords = ["step-by-step", "recipe", "guide"]
template_keywords = ["template", "formatting"]
automator_keywords = ["script", "code", "executable"]
simple_instruction_keywords = ["explain", "describe", "list", "tell me", "give me"]
context_instruction_keywords = ["based on", "with this information"]
question_keywords = ["what", "where", "when", "who", "why"]

In [3]:
def contains_keywords(text, keywords):
    return any(keyword in text.lower() for keyword in keywords)

In [12]:
def analyze_prompt_structure(all_data):
    detected_patterns = []
    
    for data in all_data:
        for source in data.get("Sources", []):
            body = source.get("Body", "")
            created_at = source.get("CreatedAt")
            closed_at = source.get("ClosedAt")
            state = "Closed" if closed_at is not None else "Open"
            
            time_lapsed = None
            if created_at and closed_at:
                created_at_dt = datetime.strptime(created_at, "%Y-%m-%dT%H:%M:%SZ")
                closed_at_dt = datetime.strptime(closed_at, "%Y-%m-%dT%H:%M:%SZ")
                time_lapsed = closed_at_dt - created_at_dt
            
            # Detect patterns in body
            body_patterns = []
            if contains_keywords(body, persona_keywords):
                body_patterns.append("Persona Pattern")
            if contains_keywords(body, recipe_keywords):
                body_patterns.append("Recipe Pattern")
            if contains_keywords(body, template_keywords):
                body_patterns.append("Template Pattern")
            if contains_keywords(body, automator_keywords):
                body_patterns.append("Output Automator Pattern")
            if contains_keywords(body, simple_instruction_keywords):
                body_patterns.append("Simple Instruction Pattern")
            if contains_keywords(body, context_instruction_keywords):
                body_patterns.append("Context and Instruction Pattern")
            if contains_keywords(body, question_keywords):
                body_patterns.append("Question Pattern")
            
            # Process ChatGPT sharing data
            chatgpt_sharing = source.get("ChatgptSharing", [])
            for sharing in chatgpt_sharing:
                conversations = sharing.get("Conversations", [])
                for conversation in conversations:
                    for pattern in body_patterns:
                        detected_patterns.append({
                            # Original fields
                            "Issue Number": source.get("Number"),
                            "Detected Patterns": pattern,
                            "State": state,
                            "Time Lapsed": time_lapsed,
                            "Number of Prompts": sharing.get("NumberOfPrompts"),
                            "Conversation": sharing.get("URL"),
                            
                            # Additional fields from JSON
                            "Type": source.get("Type"),
                            "URL": source.get("URL"),
                            "Author": source.get("Author"),
                            "RepoName": source.get("RepoName"),
                            "RepoLanguage": source.get("RepoLanguage"),
                            "Title": source.get("Title"),
                            "Body": body,
                            "UpdatedAt": source.get("UpdatedAt"),
                            "CSharing_URL": sharing.get("URL"),
                            "CSharing_Status": sharing.get("Status"),
                            "CSharing_DateOfConversation": sharing.get("DateOfConversation"),
                            "CSharing_Title": sharing.get("Title"),
                            "CSharing_TokensOfPrompts": sharing.get("TokensOfPrompts"),
                            "CSharing_TokensOfAnswers": sharing.get("TokensOfAnswers"),
                            "Conversation_Prompt": conversation.get("Prompt"),
                            "Conversation_Answer": conversation.get("Answer")
                        })
    
    return detected_patterns

In [13]:
file_pattern = "C:/Users/Dell/Desktop/AI for Software Engineering/Project/Dataset/snapshot_*/*_issue_sharings.json"
all_data = []

for path in glob.glob(file_pattern):
    print(f"Processing file: {path}")
    with open(path, "r") as file:
        data = json.load(file)
        all_data.append(data)

print(f"Total files processed: {len(all_data)}")

Processing file: C:/Users/Dell/Desktop/AI for Software Engineering/Project/Dataset\snapshot_20230727\20230727_195941_issue_sharings.json
Processing file: C:/Users/Dell/Desktop/AI for Software Engineering/Project/Dataset\snapshot_20230803\20230803_094705_issue_sharings.json
Processing file: C:/Users/Dell/Desktop/AI for Software Engineering/Project/Dataset\snapshot_20230810\20230810_123938_issue_sharings.json
Processing file: C:/Users/Dell/Desktop/AI for Software Engineering/Project/Dataset\snapshot_20230817\20230817_130502_issue_sharings.json
Processing file: C:/Users/Dell/Desktop/AI for Software Engineering/Project/Dataset\snapshot_20230824\20230824_101836_issue_sharings.json
Processing file: C:/Users/Dell/Desktop/AI for Software Engineering/Project/Dataset\snapshot_20230831\20230831_061759_issue_sharings.json
Processing file: C:/Users/Dell/Desktop/AI for Software Engineering/Project/Dataset\snapshot_20230907\20230907_092956_issue_sharings.json
Processing file: C:/Users/Dell/Desktop/AI

In [14]:
patterns = analyze_prompt_structure(all_data)
print(f"Total patterns detected: {len(patterns)}")


Total patterns detected: 12618


In [15]:
columns = [
    "Issue Number", "Detected Patterns", "State", "Time Lapsed", 
    "Number of Prompts", "Conversation", "Type", "URL", "Author",
    "RepoName", "RepoLanguage", "Title", "Body", "UpdatedAt",
    "CSharing_URL", "CSharing_Status", "CSharing_DateOfConversation",
    "CSharing_Title", "CSharing_TokensOfPrompts", "CSharing_TokensOfAnswers",
    "Conversation_Prompt", "Conversation_Answer"
]

output_file = "detected_patterns_all_issues.csv"
with open(output_file, "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=columns)
    writer.writeheader()
    writer.writerows(patterns)

print(f"Analysis complete. Results written to {output_file}")

Analysis complete. Results written to detected_patterns_all_issues.csv


In [16]:
df = pd.DataFrame(patterns)
df.head()

Unnamed: 0,Issue Number,Detected Patterns,State,Time Lapsed,Number of Prompts,Conversation,Type,URL,Author,RepoName,...,Body,UpdatedAt,CSharing_URL,CSharing_Status,CSharing_DateOfConversation,CSharing_Title,CSharing_TokensOfPrompts,CSharing_TokensOfAnswers,Conversation_Prompt,Conversation_Answer
0,7,Question Pattern,Open,NaT,1,https://chat.openai.com/share/e377307e-039a-4f...,issue,https://github.com/Shreya-R-Dixit-Memorial-Fou...,arunbatchu,Shreya-R-Dixit-Memorial-Foundation/EyeDaV2,...,Here is a link to what ChatGPT is suggesting :...,2023-06-27T23:15:32Z,https://chat.openai.com/share/e377307e-039a-4f...,200,"June 24, 2023",GitHub-Discord Webhook Integration,10,285,how can i make github notifications show up in...,GitHub and Discord don't directly integrate wi...
1,1,Template Pattern,Open,NaT,1,https://chat.openai.com/share/b9df6ce0-f2c9-41...,issue,https://github.com/aahnik/temple-web/issues/1,aahnik,aahnik/temple-web,...,File: https://github.com/aahnik/temple-web/blo...,2023-07-10T14:05:15Z,https://chat.openai.com/share/b9df6ce0-f2c9-41...,200,"July 10, 2023",Scroll Zoom & Color Effect,121,432,"on scroll, i want to apply zoom and color effe...",To apply the zoom and color effect on images w...
2,39,Output Automator Pattern,Open,NaT,2,https://chat.openai.com/share/47222295-450c-42...,issue,https://github.com/clojure-emacs/clj-suitable/...,vemv,clojure-emacs/clj-suitable,...,"<img width=""547"" alt=""image"" src=""https://gith...",2023-07-26T19:02:25Z,https://chat.openai.com/share/47222295-450c-42...,200,"July 26, 2023",JS Regex for Identifiers.,127,670,please write a javascript regex that only matc...,"In JavaScript, valid identifiers must follow t..."
3,39,Output Automator Pattern,Open,NaT,2,https://chat.openai.com/share/47222295-450c-42...,issue,https://github.com/clojure-emacs/clj-suitable/...,vemv,clojure-emacs/clj-suitable,...,"<img width=""547"" alt=""image"" src=""https://gith...",2023-07-26T19:02:25Z,https://chat.openai.com/share/47222295-450c-42...,200,"July 26, 2023",JS Regex for Identifiers.,127,670,Thanks! Now please tell me all reserved words.,"As of 2021, the ECMAScript specification defin..."
4,40,Output Automator Pattern,Open,NaT,5,https://chat.openai.com/share/0b26f548-9912-44...,issue,https://github.com/sanjar-notes/reactjs-notes/...,sanjarcode,sanjar-notes/reactjs-notes,...,A hook to run code just before React paints th...,2023-07-26T05:14:24Z,https://chat.openai.com/share/0b26f548-9912-44...,200,"July 26, 2023",useLayoutEffect: Advantages & Usage,172,1633,Is this a correct understanding of React's use...,Your understanding of useLayoutEffect in React...


In [18]:
print((df['Detected Patterns'] == '').sum())
print(df['Detected Patterns'].isna().sum())
print(df['Detected Patterns'].isnull().sum())
df.shape

0
0
0


(12618, 22)

In [19]:
# Save the DataFrame to a CSV file
df.to_csv('issues_sharing_visualize_output_with_patterns.csv', index=False)