## Import Libraries and Data

In [1]:
import os
from helper_funcs import get_tokens,get_headers,fetch_top_posts,fetch_last_posts,plotly_graphs
import pandas as pd 
import numpy as np
import ast
from sklearn.preprocessing import MultiLabelBinarizer
from xgboost import XGBClassifier
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix,classification_report
import plotly.graph_objects as go

In [2]:
PRODUCT_HUNT_API_KEY = os.getenv("PRODUCT_HUNT_API_KEY")
PRODUCT_HUNT_API_SECRET = os.getenv('PRODUCT_HUNT_API_SECRET')

In [3]:
token, token_type = get_tokens()
headers = get_headers(token,token_type)

In [4]:
start_date="2024-08-01T00:00:00Z"
end_date = "2024-08-31T23:59:59Z"

In [5]:
top_posts = fetch_top_posts(start_date=start_date,end_date=end_date,headers=headers, limit =100)
last_posts = fetch_last_posts(start_date=start_date,end_date=end_date,headers=headers, limit =100)

In [6]:
print(f"Name :{top_posts[1]['node']['name']},\nDescription:{top_posts[1]['node']['description']},\nFirst comment:{top_posts[1]['node']['comments']['nodes'][0]['body']}")

Name :Me.bot,
Description:Me.bot captures and connects your thoughts to understand you better, synthesizing a coach for all your life challenges, from a big career move to a small gloomy moment.,
First comment:Hello, I’m Felix Tao, the CEO of Mindverse. Let me introduce <b>Me.bot</b> to you. It is an app I use everyday, for several hours!

Surrounded by centralized AI models, we believe in exploring a path where everyone can <b>train their own personal AI</b>. Everyone deserves an AI defined by them, not by a "Big Brother."

<b>Our solution</b>
Our product, Me.bot, is designed to be a <b>personalized AI companion</b>. It learns and evolves with you, coaching you based on your unique experiences and interactions. You can easily build your memory archive with Me.bot all-compassing multimodal recognition, and Me.bot will connect the dots of your memories to inspire and support you.

<b>Key features</b>
🌟<b>Serendipity</b>: Me.bot learns from you, offering inspiration and advice when you n

In [7]:
print(f"Name :{last_posts[2]['node']['name']},\nDescription:{last_posts[2]['node']['description']},\nFirst comment:{last_posts[2]['node']['comments']['nodes'][0]['body']}")

Name :Midjourney Prompt Guide Tutorial Ebook,
Description:Are you ready to take your digital art and content creation to the next level? 🌟 The Midjourney Prompt Guide Tutorial is packed with over 40,000 prompts and easy-to-follow tutorials,,
First comment:Are you looking to elevate your digital art and content creation? Our Midjourney Prompt Guide Tutorial offers everything you need to turn simple text into stunning visuals. Whether you're an artist, designer, or content creator, this eBook is packed with over 40,000 prompts and step-by-step tutorials to spark your creativity and streamline your workflow.


## EDA - Top Posts of August

In [8]:
top_posts_df = pd.DataFrame([top_posts[i]['node'] for i in range(len(top_posts))])
top_posts_df.head()

Unnamed: 0,name,description,url,votesCount,createdAt,tagline,commentsCount,comments,topics
0,Wordware (YC S24),Wordware is an IDE that enables anyone to buil...,https://www.producthunt.com/posts/wordware-yc-...,7467,2024-08-02T07:01:00Z,Your tool for building AI agents with natural ...,165,{'nodes': [{'body': '👋🏻 Hi Product Hunt makers...,"{'nodes': [{'slug': 'software-engineering'}, {..."
1,Me.bot,Me.bot captures and connects your thoughts to ...,https://www.producthunt.com/posts/me-bot-2?utm...,2664,2024-08-06T07:01:00Z,The inspiring companion for your life,430,"{'nodes': [{'body': 'Hello, I’m Felix Tao, the...","{'nodes': [{'slug': 'productivity'}, {'slug': ..."
2,10xlaunch,Just add our one-line script to your website a...,https://www.producthunt.com/posts/10xlaunch?ut...,1577,2024-08-27T07:01:00Z,Get 10x more users from same website traffic,206,"{'nodes': [{'body': 'Hey fam, Mo here from 10...","{'nodes': [{'slug': 'sales'}, {'slug': 'artifi..."
3,Flowith,Flowith is the AI for deep work. Surpassing tr...,https://www.producthunt.com/posts/flowith?utm_...,1474,2024-08-07T07:01:00Z,AI for deep work,109,{'nodes': [{'body': 'Hey Product Hunt communit...,"{'nodes': [{'slug': 'productivity'}, {'slug': ..."
4,Brainybear.ai,Build AI Chatbots in 3 Steps and Train in 3 Cl...,https://www.producthunt.com/posts/brainybear-a...,1370,2024-08-06T07:01:00Z,Train AI chatbots in 3 clicks and help custome...,108,"{'nodes': [{'body': 'Hey Hunters, I'm thrille...","{'nodes': [{'slug': 'messaging'}, {'slug': 'ar..."


In [9]:
top_posts_df['date'] = pd.to_datetime(top_posts_df['createdAt']).dt.date
top_posts_df['day'] = pd.to_datetime(top_posts_df['createdAt']).dt.day_name()
top_posts_df['topic_list'] = top_posts_df['topics'].apply(lambda x: [j['slug'] for j in x['nodes']])

In [10]:
plotly_graphs('histogram',top_posts_df['day'],title = 'Day of Launch')

Of the top performing posts of August, the common trend is to launch on Weekdays.

In [11]:
plotly_graphs('scatter',x=top_posts_df['day'],y=top_posts_df['votesCount'],
              title='Vote Count compared to day of Launch')

In [12]:
plotly_graphs('box',x=top_posts_df['day'],y=top_posts_df['votesCount'],
              title='Vote Count compared to day of Launch')

In [13]:
plotly_graphs('bar', 
              x=top_posts_df.groupby('day')['votesCount'].mean().sort_values(ascending=False).round(0).index,
              y=top_posts_df.groupby('day')['votesCount'].mean().sort_values(ascending=False).round(0),
              title='Average Vote Count vs Day of Launch')


In [14]:
top_posts_df.groupby('day')['votesCount'].mean().round(0).sort_values(ascending=False)

day
Friday       1191.0
Tuesday       877.0
Wednesday     778.0
Monday        736.0
Thursday      692.0
Saturday      638.0
Sunday        590.0
Name: votesCount, dtype: float64

In [15]:
top_posts_df[top_posts_df['votesCount']<7000].groupby('day')['votesCount'].mean().round(0).sort_values(ascending=False)

day
Tuesday      877.0
Wednesday    778.0
Monday       736.0
Thursday     692.0
Friday       668.0
Saturday     638.0
Sunday       590.0
Name: votesCount, dtype: float64

In [16]:
top_posts_df.groupby('day')['commentsCount'].mean().round(0).sort_values(ascending=False)

day
Tuesday      191.0
Wednesday    175.0
Monday       173.0
Thursday     172.0
Friday       158.0
Saturday     143.0
Sunday       128.0
Name: commentsCount, dtype: float64

In [18]:
plotly_graphs('bar', 
              x=top_posts_df.groupby('day')['commentsCount'].mean().sort_values(ascending=False).round(0).index,
              y=top_posts_df.groupby('day')['commentsCount'].mean().sort_values(ascending=False).round(0),
              title='Average comments Count vs Day of Launch')


**Few trends observed:**
* The average votes gained seems to be high on the weekdays rather than on Saturdays and Sundays.
* Also it's worth noting that launch of one startup heavily skewed the average votes data

*One of the reasons behind this trend may be that people like to enjoy their weekends and are not that active on Product Hunt.*

**Also typically, from observation, the launches that do well in the early hours or on the first day of the launch tend to do well throughout. One way to test if this observation holds true is to check the correlation between the votes gained within 24 hours to launch vs total votes gained. But this is not possible as the Product Hunt's GraphQL API only provides total votes at the moment**

In [22]:
plotly_graphs('bar',
              x=top_posts_df.explode('topic_list')['topic_list'].value_counts()[:10].index,
              y=top_posts_df.explode('topic_list')['topic_list'].value_counts()[:10],
              title = 'Topics of Top 100 launches of August'
       )

As expected most (64 of 100) of the top launches in August are related to the AI domain

***I have been following product Hunt for a while and one thing that stands out in top performing posts almost always has been the first comment from the maker.***

## EDA II

Let us take a look at the difference between launches that are voted the most and voted the least in the month of August

Here are a few things we can take a look at:
* The description of the product/service
* First comment by the maker: which provides a detailed overview of how the product works and other details

We can use the description to train a classification model to determine whether a launch will be successful based on the number of votes it gets

Although product matters the most when it comes to the votes it gets but how the product is marketed also matters

In [23]:
for i in range(len(top_posts_df[:5])):
    print(top_posts_df.loc[i,'description'])
    print()

Wordware is an IDE that enables anyone to build complex AI Agents and applications. Domain experts and engineers can now iterate 20x faster with prebuilt tools, API deployment, tracing, and more. Finally, build high-quality and reliable AI!

Me.bot captures and connects your thoughts to understand you better, synthesizing a coach for all your life challenges, from a big career move to a small gloomy moment.

Just add our one-line script to your website and know exactly who's visiting your website – get names, emails, and LinkedIn profiles of your anonymous website visitors automatically as soon as they land on your website.

Flowith is the AI for deep work. Surpassing traditional chat-based tools, it streamlines tasks on a multi-thread interface powered by a most advanced agent framework. The intuitive canvas and smart framework boost productivity, helping users stay in the flow.

Build AI Chatbots in 3 Steps and Train in 3 Clicks. Brainybear scans your website or uploaded files to del

In [24]:
last_posts_df = pd.DataFrame([last_posts[i]['node'] for i in range(len(top_posts))])
last_posts_df['date'] = pd.to_datetime(last_posts_df['createdAt']).dt.date
last_posts_df['day'] = pd.to_datetime(last_posts_df['createdAt']).dt.day_name()
last_posts_df['topic_list'] = last_posts_df['topics'].apply(lambda x: [j['slug'] for j in x['nodes']])
last_posts_df.head()

Unnamed: 0,name,description,url,votesCount,createdAt,tagline,commentsCount,comments,topics,date,day,topic_list
0,Tube Mastery and Monetization,Tube Mastery and Monetization is Matt Par's fl...,https://www.producthunt.com/posts/tube-mastery...,1,2024-08-06T01:34:04Z,Help anyone start & grow a successful YouTube ...,1,{'nodes': [{'body': 'If you're tired of trying...,"{'nodes': [{'slug': 'youtube'}, {'slug': 'mone...",2024-08-06,Tuesday,"[youtube, monetization, online-learning]"
1,ostoba.com,Create todo tasks with help of AI,https://www.producthunt.com/posts/ostoba-com?u...,1,2024-08-06T21:36:55Z,Streamline Tasks and Stay Organized with AI Po...,0,{'nodes': []},"{'nodes': [{'slug': 'productivity'}, {'slug': ...",2024-08-06,Tuesday,"[productivity, task-management, artificial-int..."
2,Midjourney Prompt Guide Tutorial Ebook,Are you ready to take your digital art and con...,https://www.producthunt.com/posts/midjourney-p...,1,2024-08-06T20:56:55Z,Get 40000+ Midjourney Prompts,1,{'nodes': [{'body': 'Are you looking to elevat...,"{'nodes': [{'slug': 'productivity'}, {'slug': ...",2024-08-06,Tuesday,"[productivity, artificial-intelligence, digita..."
3,liiist Platform,liiist solves the problem of information overl...,https://www.producthunt.com/posts/liiist-platf...,1,2024-08-06T20:00:05Z,The Ultimate World List Platform,0,{'nodes': []},"{'nodes': [{'slug': 'platformers'}, {'slug': '...",2024-08-06,Tuesday,"[platformers, shopping, social-networking]"
4,Option Payoff Chart,Calculate and visualize option payoffs,https://www.producthunt.com/posts/option-payof...,1,2024-08-07T07:01:00Z,Free option payoff visualiser,1,{'nodes': [{'body': 'This is a free option pay...,"{'nodes': [{'slug': 'analytics'}, {'slug': 'fi...",2024-08-07,Wednesday,"[analytics, finance, data-visualization]"


In [25]:
for i in range(len(last_posts_df[:5])):
    print(last_posts_df.loc[i,'description'])
    print()

Tube Mastery and Monetization is Matt Par's flagship course that reveals how he runs 12 YouTube channels and makes 7 figures from them. It's the best YouTube course and community on the market.

Create todo tasks with help of AI

Are you ready to take your digital art and content creation to the next level? 🌟 The Midjourney Prompt Guide Tutorial is packed with over 40,000 prompts and easy-to-follow tutorials,

liiist solves the problem of information overload by providing a single place to find everything you need, organized and ranked by relevance and uses a combination of AI, ML, and user feedback to generate personalized lists of the best results for any query.

Calculate and visualize option payoffs



In [26]:
top_posts_df.iloc[0]['comments']['nodes'][0]['body']

'👋🏻 Hi Product Hunt makers!\n\nI’m Kamil, Head of Growth at Wordware—an IDE for building AI agents. Today, we’re officially launching the Wordware platform, and we’re excited to show the world what we built.\n\nIt’s a tool (an IDE) that enables you to quickly build custom AI agents for specific use cases like legal contract generation, marketing content automation, invoice analysis, candidate screening, generating PRDs, and many more. We call applications built on Wordware ‘WordApps’ because you can create them using natural language—in other words, using words (pun intended).\n\nOur core belief is that the domain expert—not the engineer—knows what good LLM output looks like. For example, lawyers building legal SaaS need to be deeply involved in the process, and working directly in the codebase or going back-and-forth with engineers isn’t the way to go.\n\nMost of our clients are cross-functional teams, including less technical members, who need to collaborate with engineers on LLM app

In [27]:
print(top_posts_df.iloc[0]['comments']['nodes'][0]['body'])

👋🏻 Hi Product Hunt makers!

I’m Kamil, Head of Growth at Wordware—an IDE for building AI agents. Today, we’re officially launching the Wordware platform, and we’re excited to show the world what we built.

It’s a tool (an IDE) that enables you to quickly build custom AI agents for specific use cases like legal contract generation, marketing content automation, invoice analysis, candidate screening, generating PRDs, and many more. We call applications built on Wordware ‘WordApps’ because you can create them using natural language—in other words, using words (pun intended).

Our core belief is that the domain expert—not the engineer—knows what good LLM output looks like. For example, lawyers building legal SaaS need to be deeply involved in the process, and working directly in the codebase or going back-and-forth with engineers isn’t the way to go.

Most of our clients are cross-functional teams, including less technical members, who need to collaborate with engineers on LLM applications

In [28]:
print(top_posts_df.iloc[1]['comments']['nodes'][0]['body'])

Hello, I’m Felix Tao, the CEO of Mindverse. Let me introduce <b>Me.bot</b> to you. It is an app I use everyday, for several hours!

Surrounded by centralized AI models, we believe in exploring a path where everyone can <b>train their own personal AI</b>. Everyone deserves an AI defined by them, not by a "Big Brother."

<b>Our solution</b>
Our product, Me.bot, is designed to be a <b>personalized AI companion</b>. It learns and evolves with you, coaching you based on your unique experiences and interactions. You can easily build your memory archive with Me.bot all-compassing multimodal recognition, and Me.bot will connect the dots of your memories to inspire and support you.

<b>Key features</b>
🌟<b>Serendipity</b>: Me.bot learns from you, offering inspiration and advice when you need it most.
🧠<b>Second Brain</b>: Me.bot helps you understand yourself better and presents its insights into you.
💬<b>Speak to Remind</b>: Set reminders with your voice—it's easier than ever.
📁<b>Smart Topics<

In [29]:
[last_posts_df.iloc[12]['comments']['nodes'][0]['body'] if(last_posts_df.iloc[12]['comments']['nodes']) else None]

['🚧 **Enhance Construction Site Safety with AI** 🚧\n\nWelcome to the launch of "Construction-Hazard-Detection," your ultimate AI-driven solution for boosting safety on construction sites. Our system leverages the powerful YOLOv8 model for real-time object detection, identifying key hazards such as workers without helmets or safety vests and those in close proximity to machinery and vehicles.\n\n#### Key Features:\n- **Real-Time Detection:** Utilizes YOLOv8 for immediate hazard identification.\n- **Safety Zones:** Uses HDBSCAN to cluster safety cone coordinates, creating controlled zones and alerting when people enter these areas.\n- **Accurate Alerts:** Advanced post-processing algorithms enhance detection accuracy, ensuring reliable hazard notifications.\n- **Multi-Platform Notifications:** Integrates with LINE, Messenger, WeChat, and Telegram to send instant alerts and real-time images.\n\n#### Why Choose Us?\n- **Precision:** High detection accuracy through advanced AI and post-proc

In [30]:
top_first_comments_len = [len(top_posts[i]['node']['comments']['nodes'][0]['body']) 
                          for i in range(len(top_posts))]

last_first_comments_len = [len(last_posts[i]['node']['comments']['nodes'][0]['body']) 
                           if last_posts[i]['node']['comments']['nodes'] else 0
                           for i in range(len(last_posts))]

In [33]:
plotly_graphs('histogram',x=top_first_comments_len)

In [34]:
np.array(top_first_comments_len).mean()

1595.18

In [36]:
plotly_graphs('histogram',x=last_first_comments_len)

*As we can see from couple of example from both types of posts, the first comment from the maker is very detailed in top performing launches whereas the first comments in case of the launches with least amount of votes the comment is not very detailed and sometimes there is no comment from the maker*

## Classification Model

In [37]:
df = pd.read_csv('PH_posts_data.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,name,description,url,votesCount,createdAt,tagline,commentsCount,comments,topics
0,0,Not Diamond,Not Diamond isn’t like other chatbots you’ve u...,https://www.producthunt.com/posts/not-diamond?...,691,2024-08-01T07:01:00Z,The last chatbot you’ll ever need,219,{'nodes': [{'body': 'Hey Product Hunt!\r\n\r\n...,"{'nodes': [{'slug': 'developer-tools'}, {'slug..."
1,1,Clarity,Clarity is purpose-built for founder-led sales...,https://www.producthunt.com/posts/clarity-9f37...,545,2024-08-01T07:01:00Z,A meeting recorder for founder-led sales,199,"{'nodes': [{'body': ""Congrats on the launch Au...","{'nodes': [{'slug': 'productivity'}, {'slug': ..."
2,2,Mito Health,Mito Health uses blood work at regular labs to...,https://www.producthunt.com/posts/mito-health?...,358,2024-08-01T07:01:00Z,Better insights from bloodwork,117,{'nodes': [{'body': 'Hi Product Hunt! 👋 \n\nWe...,"{'nodes': [{'slug': 'health-fitness'}, {'slug'..."
3,3,EduWiz.AI,Improve your writing effortlessly with EduWiz....,https://www.producthunt.com/posts/eduwiz-ai?ut...,292,2024-08-01T07:01:00Z,Write magical paperwork in seconds with AI,105,"{'nodes': [{'body': 'Hey, Product Hunt! 👋\n\nI...","{'nodes': [{'slug': 'writing'}, {'slug': 'educ..."
4,4,Mind Visuals,Stay in your creator zone and edit videos in s...,https://www.producthunt.com/posts/mind-visuals...,278,2024-08-01T07:01:00Z,Drag and drop animations for creators,61,{'nodes': [{'body': 'Mind Visuals is now live!...,"{'nodes': [{'slug': 'design-tools'}, {'slug': ..."


In [38]:
df.drop('Unnamed: 0',axis=1,inplace=True)
df['Date'] = pd.to_datetime(df['createdAt']).dt.date
# df['date'] = pd.to_datetime(df['createdAt']).dt.date
df['day'] = pd.to_datetime(df['createdAt']).dt.day_name()
df['topic_list'] = df['topics'].apply(lambda x: [j['slug'] for j in ast.literal_eval(x)['nodes']])
df.head()

Unnamed: 0,name,description,url,votesCount,createdAt,tagline,commentsCount,comments,topics,Date,day,topic_list
0,Not Diamond,Not Diamond isn’t like other chatbots you’ve u...,https://www.producthunt.com/posts/not-diamond?...,691,2024-08-01T07:01:00Z,The last chatbot you’ll ever need,219,{'nodes': [{'body': 'Hey Product Hunt!\r\n\r\n...,"{'nodes': [{'slug': 'developer-tools'}, {'slug...",2024-08-01,Thursday,"[developer-tools, artificial-intelligence, bots]"
1,Clarity,Clarity is purpose-built for founder-led sales...,https://www.producthunt.com/posts/clarity-9f37...,545,2024-08-01T07:01:00Z,A meeting recorder for founder-led sales,199,"{'nodes': [{'body': ""Congrats on the launch Au...","{'nodes': [{'slug': 'productivity'}, {'slug': ...",2024-08-01,Thursday,"[productivity, sales, artificial-intelligence]"
2,Mito Health,Mito Health uses blood work at regular labs to...,https://www.producthunt.com/posts/mito-health?...,358,2024-08-01T07:01:00Z,Better insights from bloodwork,117,{'nodes': [{'body': 'Hi Product Hunt! 👋 \n\nWe...,"{'nodes': [{'slug': 'health-fitness'}, {'slug'...",2024-08-01,Thursday,"[health-fitness, artificial-intelligence, life..."
3,EduWiz.AI,Improve your writing effortlessly with EduWiz....,https://www.producthunt.com/posts/eduwiz-ai?ut...,292,2024-08-01T07:01:00Z,Write magical paperwork in seconds with AI,105,"{'nodes': [{'body': 'Hey, Product Hunt! 👋\n\nI...","{'nodes': [{'slug': 'writing'}, {'slug': 'educ...",2024-08-01,Thursday,"[writing, education, artificial-intelligence]"
4,Mind Visuals,Stay in your creator zone and edit videos in s...,https://www.producthunt.com/posts/mind-visuals...,278,2024-08-01T07:01:00Z,Drag and drop animations for creators,61,{'nodes': [{'body': 'Mind Visuals is now live!...,"{'nodes': [{'slug': 'design-tools'}, {'slug': ...",2024-08-01,Thursday,"[design-tools, marketing, video]"


Generally anything above 200 upvotes on Product Hunt has a chance of becoming product of the day and 200-300 upvotes is considered a good lauch.

In [40]:
df[df['votesCount']>200].shape

(239, 12)

In [41]:
def categorize_votes(vote):
    if vote > 150:
        return 'Success'
    elif vote > 50:
        return 'Average'
    else:
        return 'low'

df['vote_category'] = df['votesCount'].apply(categorize_votes)

In [42]:
first_comments=[ast.literal_eval(df['comments'][i])['nodes'][0]['body'] 
 if len(ast.literal_eval(df['comments'][i])['nodes']) else None
 for i in range(len(df))]

In [43]:
df['first_comments'] = first_comments
df['first_comments_len']=[len(first_comments[i]) if first_comments[i] else 0 
                          for i in range(len(df['first_comments']))]

In [44]:
ast.literal_eval(df['topics'][0])['nodes']

[{'slug': 'developer-tools'},
 {'slug': 'artificial-intelligence'},
 {'slug': 'bots'}]

In [45]:
df[['votesCount','commentsCount','first_comments_len']].corr()

Unnamed: 0,votesCount,commentsCount,first_comments_len
votesCount,1.0,0.701128,0.246992
commentsCount,0.701128,1.0,0.298725
first_comments_len,0.246992,0.298725,1.0


In [46]:
f'Number of topics: {len(list(sorted(set(df['topic_list'].explode().values))))}'

'Number of topics: 295'

In [47]:
mlb = MultiLabelBinarizer()
topics_one_hot = mlb.fit_transform(df['topic_list'])

In [51]:
plotly_graphs('histogram',
              x=[len(top_posts_df['description'][i]) for i in range(len(top_posts_df))],
              title='Length of description of Top Posts')

In [53]:
plotly_graphs('histogram',
              x=[len(last_posts_df['description'][i]) for i in range(len(top_posts_df))],
              title='Length of description of Top Posts')

In [54]:
topics_one_hot_df = pd.DataFrame(topics_one_hot, columns=mlb.classes_)
df = pd.concat([df, topics_one_hot_df], axis=1)
df.head()

Unnamed: 0,name,description,url,votesCount,createdAt,tagline,commentsCount,comments,topics,Date,...,weather,web-app,web-design,web3,website-builder,wi-fi,word-games,wordpress,writing,youtube
0,Not Diamond,Not Diamond isn’t like other chatbots you’ve u...,https://www.producthunt.com/posts/not-diamond?...,691,2024-08-01T07:01:00Z,The last chatbot you’ll ever need,219,{'nodes': [{'body': 'Hey Product Hunt!\r\n\r\n...,"{'nodes': [{'slug': 'developer-tools'}, {'slug...",2024-08-01,...,0,0,0,0,0,0,0,0,0,0
1,Clarity,Clarity is purpose-built for founder-led sales...,https://www.producthunt.com/posts/clarity-9f37...,545,2024-08-01T07:01:00Z,A meeting recorder for founder-led sales,199,"{'nodes': [{'body': ""Congrats on the launch Au...","{'nodes': [{'slug': 'productivity'}, {'slug': ...",2024-08-01,...,0,0,0,0,0,0,0,0,0,0
2,Mito Health,Mito Health uses blood work at regular labs to...,https://www.producthunt.com/posts/mito-health?...,358,2024-08-01T07:01:00Z,Better insights from bloodwork,117,{'nodes': [{'body': 'Hi Product Hunt! 👋 \n\nWe...,"{'nodes': [{'slug': 'health-fitness'}, {'slug'...",2024-08-01,...,0,0,0,0,0,0,0,0,0,0
3,EduWiz.AI,Improve your writing effortlessly with EduWiz....,https://www.producthunt.com/posts/eduwiz-ai?ut...,292,2024-08-01T07:01:00Z,Write magical paperwork in seconds with AI,105,"{'nodes': [{'body': 'Hey, Product Hunt! 👋\n\nI...","{'nodes': [{'slug': 'writing'}, {'slug': 'educ...",2024-08-01,...,0,0,0,0,0,0,0,0,1,0
4,Mind Visuals,Stay in your creator zone and edit videos in s...,https://www.producthunt.com/posts/mind-visuals...,278,2024-08-01T07:01:00Z,Drag and drop animations for creators,61,{'nodes': [{'body': 'Mind Visuals is now live!...,"{'nodes': [{'slug': 'design-tools'}, {'slug': ...",2024-08-01,...,0,0,0,0,0,0,0,0,0,0


In [55]:
daytoint = {'Monday': 1,
 'Tuesday': 2,
 'Wednesday': 3,
 'Thursday': 4,
 'Friday': 5,
 'Saturday': 6,
 'Sunday': 7,
}

vote_category_dict ={'low':0,
    'Average':1,
 'Success':2}

In [56]:
df['daytoint']= df['day'].map(daytoint)
df['description_len']= df['description'].apply(lambda i: len(i))

In [57]:
x= df.drop(list(df.dtypes[df.dtypes==object].index)+['votesCount','commentsCount'],axis=1)
y = df['vote_category']
x.shape,y.shape

((1860, 298), (1860,))

In [58]:
x_train,x_test,y_train,y_test = train_test_split(x,y.map(vote_category_dict),test_size = 0.2)
x_train.shape,y_train.shape,x_test.shape,y_test.shape

((1488, 298), (1488,), (372, 298), (372,))

In [59]:
y.map(vote_category_dict).value_counts()

vote_category
0    1060
1     458
2     342
Name: count, dtype: int64

In [60]:
xgb= XGBClassifier(max_depth =3,max_leaves =4,learning_rate =0.1)
xgb.fit(x_train,y_train)

In [61]:
print(f'Training Accuracy: {accuracy_score(xgb.predict(x_train),y_train)}')
print(f'Training Confusion matrix: {confusion_matrix(xgb.predict(x_train),y_train)}')

Training Accuracy: 0.6397849462365591
Training Confusion matrix: [[814 263 183]
 [ 30  82  20]
 [ 21  19  56]]


In [62]:
print(f'Test Accuracy: {accuracy_score(xgb.predict(x_test),y_test)}')
print(f'Test Confusion matrix: {confusion_matrix(xgb.predict(x_test),y_test)}')

Test Accuracy: 0.5403225806451613
Test Confusion matrix: [[181  76  66]
 [ 12   9   6]
 [  2   9  11]]


In [63]:
print(classification_report(xgb.predict(x_test),y_test))

              precision    recall  f1-score   support

           0       0.93      0.56      0.70       323
           1       0.10      0.33      0.15        27
           2       0.13      0.50      0.21        22

    accuracy                           0.54       372
   macro avg       0.39      0.46      0.35       372
weighted avg       0.82      0.54      0.63       372

